Esempio n. 1
0
def random_sent_summary():
    meta_repo = CassandraDatabase(project_name='papers',
                                  repo_name='meta',
                                  id_sql_type='BIGINT',
                                  content_sql_type="TEXT")
    encoded_sents_repo = CassandraDatabase(project_name='papers',
                                           repo_name='sents',
                                           id_sql_type='BIGINT',
                                           content_sql_type="TEXT")
    loc = 0
    top3 = 0
    top10 = 0
    for id, row in meta_repo.list():
        meta = ast.literal_eval(row.replace('nan', '\'\''))
        ids = meta['children']
        random_sent_id = random.choice(ids)
        result = ast.literal_eval(
            q.query(json.dumps({
                "text": random_sent_id,
                "count": 205
            })))
        sims = result['result']
        inter = result['keywords']
        index = np.where(np.array(list(sims.keys())) == id)[0][0]
        if index < 3:
            top3 += 1
        if index < 10:
            top10 += 1
        loc += index

    print('{:.2f} top3 {} top 10 {}'.format(loc / meta_repo.count(), top3,
                                            top10))
    '''tootal count 203. USE: [9.46, 3: 135, 10: 159], upvotes: [9.68,3: 150, 10: 165], 
Esempio n. 2
0
def title_summary():
    repo = CassandraDatabase(project_name='papers',
                             repo_name='title',
                             id_sql_type='BIGINT',
                             content_sql_type="TEXT")
    loc = 0
    top3 = 0
    top10 = 0
    for id, row in repo.list():
        result = ast.literal_eval(
            q.query(json.dumps({
                "text": row,
                "count": 203
            })))
        sims = result['result']
        inter = result['keywords']
        index = np.where(np.array(list(sims.keys())) == id)[0][0]
        if index < 3:
            top3 += 1
        if index < 10:
            top10 += 1
        loc += index

    print('{:.2f} top3 {} top 10 {}'.format(loc / repo.count(), top3, top10))
    '''tootal count 203. USE: [13.62, 3: 106, 10: 148], upvotes: [20.56, 3: 83, 10: 124], 
Esempio n. 3
0
def title_sents():
    repo = CassandraDatabase(project_name='papers',
                             repo_name='title',
                             id_sql_type='BIGINT',
                             content_sql_type="TEXT")
    sent_sum_map_repo = CassandraDatabase(project_name='papers',
                                          repo_name='sent_sum_map',
                                          id_sql_type='BIGINT',
                                          content_sql_type="TEXT")
    loc = 0
    top3 = 0
    top10 = 0
    k = 0
    for id, row in repo.list():
        k += 1
        print(k)
        result = ast.literal_eval(
            q.query(json.dumps({
                "text": row,
                "count": 203
            })))
        sims = result['result']
        inter = result['keywords']

        papers_ids = []
        for sent_id in list(sims.keys()):
            paper_id = int(sent_sum_map_repo.read(sent_id)[0])
            if paper_id not in papers_ids:
                papers_ids.append(paper_id)
            if paper_id == id:
                index = len(papers_ids)
                break
        # papers_ids = np.array(papers_ids)
        # index = np.where(np.array(papers_ids) == id)[0][0]
        if index < 3:
            top3 += 1
        if index < 10:
            top10 += 1
        loc += index

    print('{:.2f} top3 {} top 10 {}'.format(loc / repo.count(), top3, top10))
    '''tootal count 203. USE: [11.40, 3: 11, 10: 143], upvotes: [17.77, 3: 107, 10: 141],