Esempio n. 1
0
def regular_gn_start_parsing(source_id):
    """parsing google news request"""
    session = DBSession()
    docs = gn_start_parsing(source_id, session)
    for doc in docs:
        doc.status = GOOGLE_NEWS_INIT_STATUS
    session.commit()
    for doc in docs:
        router(doc.doc_id, GOOGLE_NEWS_INIT_STATUS)
    session.close()
Esempio n. 2
0
def regular_vk_start_parsing(source_id):
    """parsing vk request"""
    session = DBSession()
    docs = vk_start_parsing(source_id, session)
    for doc in docs:
        doc.status = VK_COMPLETE_STATUS
    session.commit()
    for doc in docs:
        router(doc.doc_id, VK_COMPLETE_STATUS)
    session.close()
Esempio n. 3
0
def regular_find_full_text(doc_id, new_status):
    """parsing HTML page to find full text"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    try:
        find_full_text(doc)
    except Exception as err:
        err_txt = repr(err)
        if err_txt == 'Empty text':
            logging.error("Пустой текст doc_id: " + doc_id)
            new_status = EMPTY_TEXT
        elif type(err) == HTTPError:
            # print(url, err.code)
            new_status = SITE_PAGE_LOADING_FAILED
            logging.error("Ошибка загрузки код: " + str(err.code) +
                          " doc_id: " + doc_id)  # + " url: " + url)
        else:
            # print(url, type(err))
            new_status = SITE_PAGE_LOADING_FAILED
            logging.error("Неизвестная ошибка doc_id: " + doc_id)

    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 4
0
def regular_tomita_features(doc_id, new_status):
    """tomita features"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    ner_feature.create_tomita_feature(doc, grammars, session, False)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 5
0
def regular_tomita(grammar_index, doc_id, new_status):
    """tomita"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    run_tomita(doc, grammars[grammar_index], session, False)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 6
0
def regular_lemmas(doc_id, new_status):
    """counting lemmas frequency for one document"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    rb.lemmas_freq_doc(doc)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 7
0
def regular_morpho(doc_id, new_status):
    """morphologia"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    rb.morpho_doc(doc)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 8
0
def regular_rubrication(doc_id, new_status):
    """regular rubrication"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    # rb.spot_doc_rubrics2(doc_id, rubrics_for_regular, new_status)
    doc.rubric_ids = ['19848dd0-436a-11e6-beb8-9e71128cae50']
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 9
0
def regular_entities(doc_id, new_status):
    """ner entities"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    convert_tomita_result_to_markup(doc,
                                    grammars,
                                    session=session,
                                    commit_session=False)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 10
0
def regular_entities(doc_id, new_status):
    """ner entities"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    convert_tomita_result_to_markup(doc, grammars, session=session, commit_session=False)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 11
0
def regular_tomita_features(doc_id, new_status):
    """tomita features"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    ner_feature.create_tomita_feature(doc, grammars, session, False)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 12
0
def regular_tomita(grammar_index, doc_id, new_status):
    """tomita"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    run_tomita(doc, grammars[grammar_index], session, False)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 13
0
def regular_lemmas(doc_id, new_status):
    """counting lemmas frequency for one document"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    rb.lemmas_freq_doc(doc)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 14
0
def regular_morpho(doc_id, new_status):
    """morphologia"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    rb.morpho_doc(doc)
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 15
0
def regular_rubrication(doc_id, new_status):
    """regular rubrication"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    # rb.spot_doc_rubrics2(doc_id, rubrics_for_regular, new_status)
    doc.rubric_ids = ['19848dd0-436a-11e6-beb8-9e71128cae50']
    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)
Esempio n. 16
0
def regular_gn_start_parsing(source_id):
    """parsing google news request"""
    session = DBSession()
    docs = gn_start_parsing(source_id, session)
    for doc in docs:
        doc.status = GOOGLE_NEWS_INIT_STATUS
    session.commit()
    for doc in docs:
        router(doc.doc_id, GOOGLE_NEWS_INIT_STATUS)
    session.close()
Esempio n. 17
0
def regular_vk_start_parsing(source_id):
    """parsing vk request"""
    session = DBSession()
    docs = vk_start_parsing(source_id, session)
    for doc in docs:
        doc.status = VK_COMPLETE_STATUS
    session.commit()
    for doc in docs:
        router(doc.doc_id, VK_COMPLETE_STATUS)
    session.close()
Esempio n. 18
0
def regular_find_full_text(doc_id, new_status):
    """parsing HTML page to find full text"""
    session = DBSession()
    doc = session.query(Document).filter_by(doc_id=doc_id).first()
    try:
        find_full_text(doc)
    except Exception as err:
        err_txt = repr(err)
        if err_txt == 'Empty text':
            logging.error("Пустой текст doc_id: " + doc_id)
            new_status = EMPTY_TEXT
        elif type(err) == HTTPError:
            # print(url, err.code)
            new_status = SITE_PAGE_LOADING_FAILED
            logging.error("Ошибка загрузки код: " + str(err.code) + " doc_id: " + doc_id) # + " url: " + url)
        else:
            # print(url, type(err))
            new_status = SITE_PAGE_LOADING_FAILED
            logging.error("Неизвестная ошибка doc_id: " + doc_id)

    doc.status = new_status
    session.commit()
    session.close()
    router(doc_id, new_status)