예제 #1
0
def test():
    authorities = get_authorities()
    algo_ii.populate_iks_dict()

    report = open( 'report.txt', 'wb' )
    data = open('papers30000.csv', 'r')
    reader = csv.reader(data, delimiter=',', quotechar='\"')
    total_hits = 0
    count = 0
    total_citations = 0
    for row in reader:
        index = row[1]
        count += 1
        citations = row[6].split(';')
        keywords = row[9].split(';')
        del keywords[-1]
        total_citations += len(citations)
        recommendations = algo_ii.predict_citations( keywords, authorities )
        hits = 0
        for citation in citations:
            if recommendations.has_key(citation):
                hits += 1
        total_hits += hits
        print( index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n" )
        report.write(index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n")
        if (count % 50) == 0:
            print( "temp is:" )
            print( total_hits )
            print( "out of " )
            print( total_citations )
            print( "achieved accuracy:" )
            print( total_hits/float(total_citations) )
    report.close()
    data.close()
예제 #2
0
def test_models( FULL_SIM, models_files ):
    test_papers = pd.read_csv( TEST_FILEPATH )

    # NOTE: Only need for testing with AII:
    keywords_docsrels = populate_iks_dict()
    authorities = initialize_authorities()

    for mod_f in models_files:
        print( 'Testing '+ mod_f )
        model = Doc2Vec.load( mod_f )
        print( 'Model loaded.' )

        test_model( FULL_SIM, model, test_papers, keywords_docsrels, authorities )
예제 #3
0
def specific_test():
    import pandas as pd
    # NOTE: The first row of data-kw.csv should be the header, i.e.:
    #       "INDEX","TITLE","AUTHORS","YEAR","PUB_VENUE","REF_ID","REF_NUM","ABSTRACT","KEYWORDS"
    papers = pd.read_csv( 'data-kw.csv' )

    authorities = get_authorities()
    algo_ii.populate_iks_dict()

    print( "HUMAN EVAL #1:\n" )
    _specific_test( papers, authorities, ['105542', '586892', '695628', '209104', '139162'] )
    print( "\n=================================\n" )

    print( "HUMAN EVAL #2:\n" )
    _specific_test( papers, authorities, ['751328', '619377', '686318', '283022', '591411'] )
    print( "\n=================================\n" )

    print( "HUMAN EVAL #3:\n" )
    _specific_test( papers, authorities, ['360556', '1022648', '1071218', '1112586', '451082'] )
    print( "\n=================================\n" )

    print( "HUMAN EVAL #4:\n" )
    _specific_test( papers, authorities, ['90992', '784131', '1080100', '96640', '503999'] )
    print( "\n=================================\n" )
예제 #4
0
            title = list( papers[papers['INDEX'] == pid]['TITLE'] )[0]
            authors = list( papers[papers['INDEX'] == pid]['AUTHORS'] )[0]
            summary = list( papers[papers['INDEX'] == pid]['ABSTRACT'] )[0]
            year = list( papers[papers['INDEX'] == pid]['YEAR'] )[0]
            jso['title'] = title
            jso['authors'] = authors
            jso['summary'] = summary.replace('\n', ' ')
            jso['year'] = str(year)
            recs.append( jso )
        except:  # Probably an IndexError (which happens when 'pid' not in DB), but cover our bases
            pass
    #return json.dumps( recs )
    return recs  # NOTE: Not actually JSON; it's a list of dicts

def load_papers():
    import pandas as pd
    return pd.read_csv( 'data-kw.csv' )

if __name__ == '__main__':
    if 'tagmap1' in DOC2VEC_MODEL_FILE:
        USE_DBLP_IDS = False
    else:
        USE_DBLP_IDS = True

    d2v_model = initialize_d2v_model()
    keywords_docsrels = populate_iks_dict()
    authorities = initialize_authorities()
    papers = load_papers()

    serve( d2v_model, keywords_docsrels, authorities, papers )