def _generate_json_and_dispatch(salient_corpus_map:Dict, retry_count=0):
    try:

        es = es_conn.connectToES()
        total_sentences=0
        for key, value in salient_corpus_map.items():
            sentence_list =  value[1]
            docid = key
            total_sentences = total_sentences + len(sentence_list)
            if len(sentence_list) > 0:
                linked_doc = _extract_linked_doc_from_list(value[1])
                index_name = linked_doc.index_name


                log.getLogger().debug("Dispatching: " + str(docid) + " | " + index_name)
                es.index(index=index_name + index_suffix, doc_type='_doc', id=key, body=_generate_json(linked_doc, _convert_linkeddoclist_to_string(sentence_list)))
    except Exception as e:
        retry_count = retry_count + 1
        msg = "WARN: " +  str(e)

        log.getLogger().error(msg)
        if "time" in msg.lower() and retry_count < 10:
            _generate_json_and_dispatch(salient_corpus_map, retry_count)
        else:
            pass
예제 #2
0
def _generate_json_and_dispatch(linked_doc:merm_model.LinkedDocument):
    es = es_conn.connectToES()
    index_name = linked_doc.index_name
    log.getLogger().debug("Dispatching: " + str(linked_doc.uid) + " | " + index_name)

    result = es.update(index=index_name, doc_type='_doc', id=linked_doc.uid, body=_generate_json())
    log.getLogger().debug("Dispatched with result " + str(result))
예제 #3
0
def _dispatch_bulk(index_name, data_dict, retry_count=0):
    try:
        es = es_conn.connectToES()
        helpers.bulk(es, data_dict, index=index_name)
    except Exception as e:
        retry_count = retry_count + 1
        msg = "WARN: " + str(e)

        log.getLogger().error(msg)
        if "time" in msg.lower() and retry_count < 10:
            _dispatch_bulk(index_name, data_dict, retry_count)
        else:
            pass
예제 #4
0
def _dispatch(index_name, id, bodyjson, retry_count=0):
    try:
        es = es_conn.connectToES()
        es.index(index=index_name, id=id, body=bodyjson)
    except Exception as e:
        retry_count = retry_count + 1
        msg = "WARN: " + str(e)

        log.getLogger().error(msg)
        if "time" in msg.lower() and retry_count < 10:
            _dispatch(index_name, id, bodyjson, retry_count)
        else:
            pass
예제 #5
0
def run_post_process(package: merm_model.PipelinePackage):
    es = es_conn.connectToES()
    if "confluence" in package.any_analysis_dict["provider"]:
        for linked_doc in package.linked_document_list:
            if "page_views" in linked_doc.scores.keys():
                page_view_count = linked_doc.scores["page_views"]
                log.getLogger().info("Updating " + str(linked_doc.ui) +
                                     " page_views: " + str(page_view_count))

                es.update(index=linked_doc.index_name,
                          doc_type="_doc",
                          id=linked_doc.uid,
                          body={"doc": {
                              "page_views": page_view_count
                          }})
예제 #6
0
def _generate_json_and_dispatch(id, index_name, major, retry_count=0):
    try:

        es = es_conn.connectToES()
        response = es.update(index=index_name,
                             id=id,
                             body={"doc": {
                                 "majorFinal": major
                             }})
        log.getLogger().info("Updating " + id + " with " + major)
        print('response:', response)

    except Exception as e:
        retry_count = retry_count + 1
        msg = "WARN: " + str(e)

        log.getLogger().error(msg)
        if "time" in msg.lower() and retry_count < 10:
            _generate_json_and_dispatch(id, index_name, major, retry_count)
        else:
            pass
예제 #7
0
def initiate_extraction(es_conn, package):
    es = es_conn.connectToES()
    return _extract(es, package)