Exemple #1
0
def begin_parse(parameters):
    res = db_utills.select_query(
        "SELECT NAME from sites s where s.active=True;")
    for name in res:
        query = "SELECT name, link FROM sites where upper(NAME)=upper('%s')" % name.__getitem__(
            0)
        site = db_utills.select_query(query)
        name = site.__getitem__(0).__getitem__(0)
        link = site.__getitem__(0).__getitem__(1)
        print("Парсинг: " + name)

        #Если тест, то только один тип сортировки
        if parameters['test'] == '0':
            r = 4
        else:
            r = 1
        #Парсинг для каждого вида сортировки
        for count in range(r):
            result = result_conversion.convert_json(
                parserUtill.make_parse(name, link, parameters, count))
            db_load.save_to_db(result)
        db_utills.update_query(
            "UPDATE parse_sessions SET STATUS='done' where SESSION_ID=%s" %
            SESSION_ID)

        print("Парсинг " + name + " закончен")
Exemple #2
0
def save_to_db(data):
    print("Сохранение в БД...")
    for item in data:
        update_query = "INSERT INTO results %s values %s;" % (tuple(
            item.keys()), tuple(item.values()))
        db_utills.update_query(update_query)
        #db_utills.update_query("UPDATE sites SET active=FALSE where name='"+name+"';")
    print("Результат сохранен в БД")
Exemple #3
0
def save_results(tower_docs, cos_matrix):  # Сохранение итоговых датафреймов
    print('saving_results...')
    os.makedirs("analyse_result/history/" + SESSION_ID, exist_ok=True)
    os.makedirs("analyse_result/current_result", exist_ok=True)
    tower_docs.to_csv("analyse_result/history/" + SESSION_ID +
                      "/tf_idf_words.csv")
    tower_docs.to_csv("analyse_result/current_result/tf_idf_words.csv")
    cos_matrix.to_csv("analyse_result/history/" + SESSION_ID +
                      "/cos_matrix.csv")
    cos_matrix.to_csv("analyse_result/current_result/cos_matrix.csv")

    db_load.load_analyse_result(SESSION_ID)
    db_utills.update_query(
        "UPDATE analyse_sessions SET STATUS='done' where SESSION_ID=%s" %
        SESSION_ID)
Exemple #4
0
 def run(self):
     global SESSION_ID
     SESSION_ID = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
     db_utills.init_db()
     init_session()
     try:
         run_algorithm()
     except Exception as e:
         db_utills.update_query(
             "UPDATE parse_sessions SET STATUS='error' where SESSION_ID=%s"
             % SESSION_ID)
         traceback.print_exc()
     if (db_utills.select_one_item(
             "SELECT status FROM analyse_sessions where session_id=%s" %
             SESSION_ID)) != 'done':
         db_utills.update_query(
             "UPDATE analyse_sessions SET STATUS='error' where SESSION_ID=%s"
             % SESSION_ID)
Exemple #5
0
 def run(self):
     global SESSION_ID
     SESSION_ID = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
     db_utills.init_db()
     init_session()
     parameters = init_params()
     init_directories(parameters)
     print("---------------")
     print("parsing started")
     try:
         begin_parse(parameters)
     except Exception as e:
         db_utills.update_query(
             "UPDATE parse_sessions SET STATUS='error' where SESSION_ID=%s"
             % SESSION_ID)
         traceback.print_exc()
     print("parsing finished")
     if (db_utills.select_one_item(
             "SELECT status FROM parse_sessions where session_id=%s" %
             SESSION_ID)) != 'done':
         db_utills.update_query(
             "UPDATE parse_sessions SET STATUS='error' where SESSION_ID=%s"
             % SESSION_ID)
Exemple #6
0
def init_session():
    query = "INSERT INTO parse_sessions (session_id, date_create, status) "\
                           "values (%s, '%s', '%s');" % (SESSION_ID, datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), 'work')
    res = db_utills.update_query(query=query)
    print('session inited')
    return res