コード例 #1
0
def cleStart():
    ## base
    from json import load as j_loads
    v_lst = ("p2p_packet_info.txt", "packet_info.txt", "profile.txt", "stdout",
             "syslog", "syserr", "usage.txt", "VERSION.txt", "DEV_LOG.log",
             "mob_count", "*.core")
    szPWD = os_getcwd()
    ## clear files from alog
    with open("clear.list", "r") as fList:
        mList = j_loads(fList)
    for dic1 in mList:
        # goto alog path
        # print dic1["path"]
        os_chdir(dic1["path"])
        # clean files
        fShell("cat /dev/null > PTS")
        fShell("rm -rf log/* cores/*")
        # goto base again
        os_chdir(szPWD)
    ## clean other logs
    with open("start.list", "r") as fList:
        mList = j_loads(fList)
    for dic1 in mList:
        # goto alog path
        # print dic1["path"]
        os_chdir(dic1["path"])
        fShell("echo --- delete inside '%s' ---" % dic1["path"])
        fShell("rm -fv %s" % " ".join(v_lst))
        # goto base again
        os_chdir(szPWD)
コード例 #2
0
ファイル: nordlys_app.py プロジェクト: zxlzr/nordlys
def __api_request(service_label, params, index_name=None):
    """Wraps the access to the Nordlys API. It returns a 3-uple (results, total no. of results, pretty status message).

    :param service_label: a constant for the required service_label.
    :param params: request params.
    :param index_name: optional; name of index.
    :return: a list of docIDs.
    """
    results = None  # default init, it remains None if request returns error
    total = 0
    msg = ""

    url = "/".join([PROTOCOL, SERVER_HOSTNAME_API, service_label])
    if service_label == SERVICE_E_RETRIEVAL:
        url += "?q={}&model={}&start={}&1st_num_docs={}&fields_return={}".format(
            quote(params.get("q", "")),
            params.get("model", "lm"),
            params.get("start", 0),
            params.get("1st_num_docs", 100),
            params.get("fields_return", "abstract"),
        )
        url += "&num_docs={}".format(params.get("num_docs", NUM_RESULTS))

    elif service_label == SERVICE_E_LINKING:
        url += "?q={}".format(quote(params.get("q", "")))

    elif service_label == SERVICE_TTI:
        url += "?q={}&method={}&num_docs={}&start={}&index={}&field={}".format(
            quote(params.get("q", "")), params.get("method", "tc"),
            params.get("num_docs", NUM_RESULTS), params.get("start", 0),
            params.get("index", TTI_INDEX_FALLBACK_2015_10),
            params.get("field", "_id"))
    try:
        print("Service request' URL: {}".format(url))
        r = requests_get(url, timeout=REQUEST_TIMEOUT)
        print(r)
        results = j_loads(r.text)
        total = results.get("total_hits", 0)

        # Obtain postprocessed results to render, if needed
        entity_collection = MONGO_ENTITY_COLLECTIONS[0] if len(
            MONGO_ENTITY_COLLECTIONS) > 0 else "dbpedia-2015-10"
        results = process_results(results,
                                  service_label,
                                  protocol=PROTOCOL,
                                  server_hostname_api=SERVER_HOSTNAME_API,
                                  entity_collection=entity_collection,
                                  request_timeout=REQUEST_TIMEOUT)

    except ConnectionError:
        msg = "We're so sorry. There was a connection error :("
    except Timeout:
        msg = "Timeout while trying to connect to the remote server, or while receiving data from it :("
    except JSONDecodeError:
        msg = "There are no results for your query :("

    return results, total, msg
コード例 #3
0
ファイル: word_count.py プロジェクト: dzhelonkin/vcp
    def _on_full_final_result(self, _, json_str):
        Gdk.threads_enter()
        if self.keywords != "":
            #prev_text = self.phrasebuf.get_text()
            string = "----------------------------\n"
            json_data = j_loads(json_str)
            transcript = json_data["result"]["hypotheses"][0]['transcript']
            for word in self.keywords.split():
                cnt = transcript.count(word)
                substring = word + " (" + str(cnt) + ")\n"
                string += substring

            self.phrasebuf.begin_user_action()
            self.phrasebuf.insert_at_cursor(string)
            self.phrasebuf.end_user_action() 
        Gdk.threads_leave()
コード例 #4
0
ファイル: kws_gui.py プロジェクト: ibnrafail/kaldi_kws
    def _handle_full_final_result(self, _, json_str):
        keyword = self.keyword
        json_data = j_loads(json_str)
        result = []
        kw_list = keyword.split(" ") # represents the phrase splitted on words
        word_alignment = json_data["result"]["hypotheses"][0]["word-alignment"]

        # summing the time stamp of a word with segment-start time
        for i in word_alignment:
            i["start"] += json_data["segment-start"]
        # making search of any occurances of the keyword
        for i, j in enumerate(word_alignment):
            # in case come across the first element in kw_list
            if j["word"] == kw_list[0]:
                kw_list_length = len(kw_list)
                slice = word_alignment[i: i + kw_list_length]
                # if the amount of words in the phrase equal to 1
                # then save the slice and continue looping
                flag = True
                if kw_list_length != 1:
                    # else we check whether all sequential elements in slice
                    # equal to words in phrase or not
                    iter = 0
                    for item in kw_list:
                        if iter == len(slice):
                            flag = False
                            break
                        if item == slice[iter]["word"] and iter != 0:
                            pass
                        elif iter != 0 and item != slice[iter]["word"]:
                            flag = False
                        iter += 1
                if flag is True:
                    print(slice)
                    result.append(slice[0]["start"])

        self.timestamps.extend(result)

        self._display_found_timestamps(result)

        self.text_widget.config(state=NORMAL)
        self.text_widget.insert(END, "{0} ".format(json_data["result"]["hypotheses"][0]["transcript"]))
        self.text_widget.highlight_pattern(r"\b{0}\b".format(self.keyword), "red", len(self.keyword))
        self.text_widget.see(END)
        self.text_widget.config(state=DISABLED)
コード例 #5
0
def staInit():
	global proclist
	## base
	from json import load as j_loads
	with open("start.list", "r") as fList:
		mList = j_loads(fList)
	proclist.clear()
	for dic1 in mList:
		keyCheck(proclist, dic1["serv"])
		if dic1["type"]==M2TYPE.DB:
			keyCheck(proclist[dic1["serv"]], "db", [])
			proclist[dic1["serv"]]["db"].append(dic1)
		elif dic1["type"]==M2TYPE.AUTH or dic1["type"]==M2TYPE.CORE:
			keyCheck(proclist[dic1["serv"]], "core", [])
			proclist[dic1["serv"]]["core"].append(dic1)
			if dic1["type"]==M2TYPE.CORE:
				keyCheck(proclist[dic1["serv"]], "chan", set())
				proclist[dic1["serv"]]["chan"].add(dic1["chan"])
コード例 #6
0
def __api_request(service_label, params, index_name=None):
    """Wraps the access to the Nordlys API. It returns a 3-uple (results, total no. of results, pretty status message).

    :param service_label: a constant for the required service_label.
    :param params: request params.
    :param index_name: optional; name of index.
    :return: a list of docIDs.
    """
    results = None  # default init, it remains None if request returns error
    total = 0
    msg = ""

    url = ""
    if service_label == SERVICE_E_RETRIEVAL:
        url = "/".join([PROTOCOL, SERVER_HOSTNAME_API, "er", index_name])
        url += "?q={}&model={}&start={}&1st_num_docs={}&fields_return={}".format(
            quote(params.get("q", "")),
            params.get("model", "lm"),
            params.get("start", 0),
            params.get("1st_num_docs", 100),
            params.get("fields_return", "abstract"),
        )
        url += "&num_docs={}".format(params.get("num_docs", NUM_RESULTS))

    if service_label == SERVICE_E_LINKING:
        url = "/".join([PROTOCOL, SERVER_HOSTNAME_API, "el"])
        url += "?q={}".format(quote(params.get("q", "")))

    elif service_label == SERVICE_TTI:

        # TODO ideal with API

        url = "/".join([PROTOCOL, SERVER_HOSTNAME_API, "types"])
        url += "?q={}&method={}&num_docs={}&start={}&index={}&field={}".format(
            quote(params.get("q", "")), params.get("method", "tc"),
            params.get("num_docs", NUM_RESULTS), params.get("start", 0),
            params.get("index", TTI_INDEX_FALLBACK_2015_10),
            params.get("field", "_id"))

        # TODO working on local after tunneling Elastic

        # url = "/".join([PROTOCOL, "localhost:8080", index_name, "_search?q={}&fields=_id".format(
        #     quote(params.get("q", "")))])

        # TODO working on gustav1 directly to Elastic
        # url = "/".join([PROTOCOL, SERVER_HOSTNAME_ELASTIC, index_name, "_search?q={}&fields=_id".format(
        #     quote(params.get("q", "")))])

    try:
        print(url)
        r = requests_get(url, timeout=REQUEST_TIMEOUT)  # TODO AJAXify this?
        results = j_loads(r.text)
        # total = WWW_PAGINATION_MAX_RESULTS_ER
        total = len(results.get("results", 0))

        # Obtain postprocessed results to render, if needed
        results = process_results(results, service_label)

    except ConnectionError:
        msg = "We're so sorry. There was a connection error :("
    except Timeout:
        msg = "Timeout while trying to connect to the remote server, or while receiving data from it :("

    return results, total, msg
コード例 #7
0
    src = soup.select('script[src*="ProfilePageContainer.js/"]')[0]['src']
    queryHashPhoto = get_query_hash_for_get_new_photo(src)
    src = soup.select('script[src*="/Consumer.js/"]')[0]['src']
    queryHash = get_query_hash( src )
    # queryHash = '477b65a610463740ccdb83135b2014db'

    # create threads and log
    for i in range(20):
        allThread.append( Parser() )
        allThread[i].start()
    Log().start()

    # parse
    script = soup.find( 'body' ).find( 'script' )
    shareData = str(script)[52:-10]
    shareData = j_loads( shareData )
    userId =    shareData['entry_data']['ProfilePage'][0]['logging_page_id'].split( '_' )[-1]
    nowJpg =    shareData['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges']
    lastJpg =   shareData['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['page_info']['end_cursor']
    while True:
        for infoJpg in nowJpg:
            infoJpg = infoJpg['node']
            allJpg.put(infoJpg)
        if lastJpg is None: break
        nowJpg, lastJpg = get_new_data( queryHashPhoto, userId, lastJpg )
        Log.end_cursor = lastJpg
    WORK = False
    for th in allThread:
        th.join()
    Log.work = False
    end = time.time()
コード例 #8
0
for h_id in hotel_ids[:100]:
    if str(h_id) in parsed_hotels:
        continue

    reviews = []
    reviews_raw = []
    start = 0
    items = 10

    while True:
        temp = r_get(source.format(h_id, start, items), headers)
        if temp.status_code != 200:
            with codecs.open('errors_hotels.txt', 'a+', 'utf-8') as f:
                f.write(str(h_id) + str(start) + '\n')
                break
        temp = j_loads(temp.content[4:-2])
        if not temp[u'reviewDetails'][u'numberOfReviewsInThisPage']:
            break
        reviews += temp[u'reviewDetails'][u'reviewCollection']['review']
        reviews_raw.append(temp)
        start += 10
        sleep(1)

    if len(reviews) > 0:
        reviews_df = pd.DataFrame(reviews)[cols_to_remain]
        reviews_df.to_csv('reviews.csv',
                          mode='a+',
                          header=False,
                          index=False,
                          encoding='utf-8')
        with codecs.open('reviews_rav.txt', 'a+', 'utf-8') as f:
コード例 #9
0
ファイル: service_utils.py プロジェクト: iai-group/nordlys
def process_results(raw_results,
                    service_label,
                    protocol="http:/",
                    server_hostname_api="",
                    entity_collection="",
                    request_timeout=30):
    """Processes a list of raw results to obtain further data components.

    :param raw_results: a list of raw results (typically docIDs).
    :param service_label: a constant for the required service_label.
    :return:
    """
    results = []

    if service_label is SERVICE_E_RETRIEVAL:
        sorted_ranks = __sort_set_of_str_elems(
            raw_results.get("results", {}).keys())
        for rank in sorted_ranks:
            result_dict = raw_results.get("results", {}).get(rank, {})
            entity_id = result_dict.get("entity", "")
            if entity_id == "":
                continue

            unprefixed_doc_id = entity_id.split(":")[-1].split(">")[0]

            abstract_list = result_dict.get("fields", {}).get("abstract", [])
            abstract = abstract_list[0] if len(abstract_list) > 0 else ""

            # Entity catalog request for making entity cards and getting Freebase ID
            url = "/".join([protocol, server_hostname_api, "ec", entity_id])
            try:
                # print("\tCatalog request' URL: {}".format(url))
                r = requests_get(url, timeout=request_timeout)
                catalog_results = j_loads(r.text)
            except:
                catalog_results = dict()

            card_data = __obtain_card_data(catalog_results)

            # Final result dict
            result = {
                RESULT_DOC_TITLE_K:
                unprefixed_doc_id.replace("_", " "),
                RESULT_DOC_ID_K:
                entity_id,
                RESULT_DOC_SNIPPET_K:
                __shorten_abstract(abstract),
                RESULT_URL_DBPEDIA_K:
                "/".join([
                    DBPEDIA_HOSTNAME,
                    SERVICE_TO_DBPEDIA_SUBHOST[service_label],
                    unprefixed_doc_id
                ]),
                RESULT_URL_WIKIPEDIA_K:
                "/".join([
                    WIKIPEDIA_HOSTNAME,
                    SERVICE_TO_WIKIPEDIA_SUBHOST[service_label],
                    unprefixed_doc_id
                ]),
                RESULT_FREEBASE_ID_K:
                catalog_results.get("fb:<owl:sameAs>", [None])[0],
                RESULT_DOC_CARD_K:
                card_data
            }

            results.append(result)

    elif service_label is SERVICE_E_LINKING:
        query = raw_results.get("processed_query", {})

        linked_results = raw_results.get("results", {})

        result_counter = 0
        for result_l in sorted(linked_results,
                               key=lambda k: k['score'],
                               reverse=True):

            result_counter += 1
            entity_id = result_l['entity']
            score = result_l['score']
            unprefixed_doc_id = entity_id.split(":")[-1].split(">")[0]
            entity_url = "/".join([
                DBPEDIA_HOSTNAME, SERVICE_TO_DBPEDIA_SUBHOST[service_label],
                unprefixed_doc_id
            ])

            # Entity catalog request for getting popup picture and abstract
            url = "/".join([protocol, server_hostname_api, "ec", entity_id])
            try:
                # print("\tCatalog request' URL: {}".format(url))
                r = requests_get(url, timeout=request_timeout)
                catalog_results = j_loads(r.text)
            except:
                catalog_results = dict()

            # Defining result components
            picture = __get_card_picture(catalog_results)  # possibly None
            most_specific_type = __get_card_type(
                catalog_results)  # possibly None
            if most_specific_type:
                most_specific_type = most_specific_type.upper()
            abstract = __shorten_abstract(catalog_results.get(
                "<dbo:abstract>", [""])[0],
                                          max_length=400)
            formatted_result = query.replace(
                unprefixed_doc_id.lower(),
                "<a href=\"{}\" target=\"_blank\" id=\"elLink{}\" "
                "onmouseover=\"showPop(\'elPop{}\', event);\""
                " onmouseout=\"hidePop(\'elPop{}\');\""  # NOTE: important the blank between each event handler
                ">"
                "{}</a>".format(entity_url, result_counter, result_counter,
                                result_counter, unprefixed_doc_id.lower()))

            result = {  #RESULT_LINKED_SUBSTR_K: linked_substr,
                RESULT_DOC_TITLE_K: unprefixed_doc_id.replace("_", " "),
                RESULT_DOC_ID_K: entity_id[1:-1] if len(entity_id) > 1 else "",
                RESULT_DOC_SNIPPET_K: abstract,
                RESULT_DOC_PICTURE_K: picture,
                RESULT_URL_DBPEDIA_K: entity_url,
                RESULT_DOC_SCORE_K: round(score, 6),
                RESULT_DOC_TYPE_K: most_specific_type,
                RESULT_EL_TO_SHOW_K: formatted_result
            }

            results.append(result)

    elif service_label is SERVICE_TTI:

        # TODO remap to exp the TTI scores from API when method is LM

        sorted_ranks = __sort_set_of_str_elems(
            raw_results.get("results", {}).keys())
        for rank in sorted_ranks[0:10]:
            result_dict = raw_results.get("results", {}).get(rank, {})
            type_id = result_dict.get("type", "")

            if __must_be_skipped(type_id):
                continue
            unprefixed_doc_id = type_id.split(":")[-1].split(">")[0]
            result = {
                RESULT_DOC_TITLE_K:
                __convert_from_camelcase(unprefixed_doc_id).replace("_", " "),
                RESULT_DOC_ID_K:
                type_id,
                RESULT_DOC_SCORE_K:
                round(result_dict.get("score", 0), 6),
                RESULT_URL_DBPEDIA_K:
                "/".join([
                    DBPEDIA_HOSTNAME,
                    SERVICE_TO_DBPEDIA_SUBHOST[service_label],
                    unprefixed_doc_id
                ])
            }

            results.append(result)

    return results