Beispiel #1
0
def run(file_path, Field_number):
    # log.INFO('start')
    now_time = datetime.datetime.now()
    thread_base, thread_data, thread_data_notime, fields, weights, fields_header = read_file(
        BASIC_PATH + file_path, Field_number)
    read_time = datetime.datetime.now()
    # log.INFO("读取csv时间:" + str(read_time - now_time))
    cal_score = CalScore(thread_base, thread_data, thread_data_notime, weights)
    score = cal_score.cal_with_timely()
    score_notime = cal_score.cal_without_timely()
    cal_time = datetime.datetime.now()
    # log.INFO("算分时间:" + str(cal_time - read_time))
    # print('insert es')
    # write_es = WriteEs(es_client, thread_data, thread_data_notime, fields)
    # write_es.write_thread_score_notime_to_es(score_notime)
    # write_es.write_thread_score_to_es(score)
    # es_time = datetime.datetime.now()
    # print("写入es时间:"+str(es_time-cal_time))
    # log.INFO('write csv')
    file_name = str(file_path).split(".")[0]
    Write_Csv(thread_data, score, fields, file_name, fields_header)
    file_name = file_name + "_notime"
    Write_Csv_notime(thread_data_notime, score_notime, fields, file_name,
                     fields_header)
    csv_time = datetime.datetime.now()
Beispiel #2
0
def test_csv():
    thread_base, thread_data, thread_data_notime, fields, weights, fields_header = read_file(
        "./must_createTime_-60favorite_replies_.csv")
    cal_score = CalScore(thread_base, thread_data, thread_data_notime, weights)
    score = cal_score.cal_with_timely()
    print(score)
    score_notime = cal_score.cal_without_timely()
    Write_Csv(thread_data, score, fields, 'test', fields_header)
    Write_Csv_notime(thread_data_notime, score_notime, fields, 'test_notime',
                     fields_header)
Beispiel #3
0
def test_run():
    thread_base, thread_data, thread_data_notime, fields, weights = read_file(
        FILE_PATH)
    cal_score = CalScore(thread_base, thread_data, thread_data_notime, weights)
    score = cal_score.cal_with_timely()
    score_notime = cal_score.cal_without_timely()

    write_es = WriteEs(es_client, thread_data, thread_data_notime, fields)
    write_es.write_thread_score_notime_to_es(score_notime)
    write_es.write_thread_score_to_es(score)
Beispiel #4
0
def test_cal():
    thread_base, thread_data, thread_data_notime, fields, weights = read_file(
        FILE_PATH)
    cal_score = CalScore(thread_base, thread_data, thread_data_notime, weights)
    score = cal_score.cal_with_timely()
    score_notime = cal_score.cal_without_timely()

    doc = []
    index = "bbs_score_data_" + str(
        datetime.datetime.now().strftime('%Y-%m-%d'))

    for i in range(len(score)):
        data_dic = {}
        doc.append({"index": {}})
        data_dic["tid"] = score[i][0]
        data_dic["topicid"] = score[i][1]
        data_dic["score"] = score[i][2]
        for j in range(len(fields)):
            data_dic[fields[j]] = thread_data[i][j]
        doc.append(data_dic)
        if i % 1000 == 0 and i != 0:
            es_client.bulk(index=index, body=doc, doc_type="_doc")
            doc = []
        if i == len(score) - 1:
            es_client.bulk(index=index, body=doc, doc_type="_doc")

    doc = []
    index = "bbs_score_data_notime_" + str(
        datetime.datetime.now().strftime('%Y-%m-%d'))

    for i in range(len(score_notime)):
        data_dic = {}
        doc.append({"index": {}})
        data_dic["tid"] = score_notime[i][0]
        data_dic["topicid"] = score_notime[i][1]
        data_dic["score"] = score_notime[i][2]
        for j in range(len(fields[:-1])):
            data_dic[fields[j]] = thread_data[i][j]
        doc.append(data_dic)
        if i % 1000 == 0 and i != 0:
            es_client.bulk(index=index, body=doc, doc_type="_doc")
            doc = []
        if i == len(score) - 1:
            es_client.bulk(index=index, body=doc, doc_type="_doc")
Beispiel #5
0
def test_read_file():
    thread_base, thread_data, thread_data_notime, fields, weights = read_file(
        FILE_PATH)
    # print("thread_base:"+str(thread_base))
    print("thread_data:" + str(thread_data))