def begin_parse(parameters): res = db_utills.select_query( "SELECT NAME from sites s where s.active=True;") for name in res: query = "SELECT name, link FROM sites where upper(NAME)=upper('%s')" % name.__getitem__( 0) site = db_utills.select_query(query) name = site.__getitem__(0).__getitem__(0) link = site.__getitem__(0).__getitem__(1) print("Парсинг: " + name) #Если тест, то только один тип сортировки if parameters['test'] == '0': r = 4 else: r = 1 #Парсинг для каждого вида сортировки for count in range(r): result = result_conversion.convert_json( parserUtill.make_parse(name, link, parameters, count)) db_load.save_to_db(result) db_utills.update_query( "UPDATE parse_sessions SET STATUS='done' where SESSION_ID=%s" % SESSION_ID) print("Парсинг " + name + " закончен")
def save_to_db(data): print("Сохранение в БД...") for item in data: update_query = "INSERT INTO results %s values %s;" % (tuple( item.keys()), tuple(item.values())) db_utills.update_query(update_query) #db_utills.update_query("UPDATE sites SET active=FALSE where name='"+name+"';") print("Результат сохранен в БД")
def save_results(tower_docs, cos_matrix): # Сохранение итоговых датафреймов print('saving_results...') os.makedirs("analyse_result/history/" + SESSION_ID, exist_ok=True) os.makedirs("analyse_result/current_result", exist_ok=True) tower_docs.to_csv("analyse_result/history/" + SESSION_ID + "/tf_idf_words.csv") tower_docs.to_csv("analyse_result/current_result/tf_idf_words.csv") cos_matrix.to_csv("analyse_result/history/" + SESSION_ID + "/cos_matrix.csv") cos_matrix.to_csv("analyse_result/current_result/cos_matrix.csv") db_load.load_analyse_result(SESSION_ID) db_utills.update_query( "UPDATE analyse_sessions SET STATUS='done' where SESSION_ID=%s" % SESSION_ID)
def run(self): global SESSION_ID SESSION_ID = datetime.datetime.now().strftime('%Y%m%d%H%M%S') db_utills.init_db() init_session() try: run_algorithm() except Exception as e: db_utills.update_query( "UPDATE parse_sessions SET STATUS='error' where SESSION_ID=%s" % SESSION_ID) traceback.print_exc() if (db_utills.select_one_item( "SELECT status FROM analyse_sessions where session_id=%s" % SESSION_ID)) != 'done': db_utills.update_query( "UPDATE analyse_sessions SET STATUS='error' where SESSION_ID=%s" % SESSION_ID)
def run(self): global SESSION_ID SESSION_ID = datetime.datetime.now().strftime('%Y%m%d%H%M%S') db_utills.init_db() init_session() parameters = init_params() init_directories(parameters) print("---------------") print("parsing started") try: begin_parse(parameters) except Exception as e: db_utills.update_query( "UPDATE parse_sessions SET STATUS='error' where SESSION_ID=%s" % SESSION_ID) traceback.print_exc() print("parsing finished") if (db_utills.select_one_item( "SELECT status FROM parse_sessions where session_id=%s" % SESSION_ID)) != 'done': db_utills.update_query( "UPDATE parse_sessions SET STATUS='error' where SESSION_ID=%s" % SESSION_ID)
def init_session(): query = "INSERT INTO parse_sessions (session_id, date_create, status) "\ "values (%s, '%s', '%s');" % (SESSION_ID, datetime.datetime.now().strftime('%Y-%m-%d %H:%M'), 'work') res = db_utills.update_query(query=query) print('session inited') return res