Example #1
0
def test():
    authorities = get_authorities()
    algo_ii.populate_iks_dict()

    report = open( 'report.txt', 'wb' )
    data = open('papers30000.csv', 'r')
    reader = csv.reader(data, delimiter=',', quotechar='\"')
    total_hits = 0
    count = 0
    total_citations = 0
    for row in reader:
        index = row[1]
        count += 1
        citations = row[6].split(';')
        keywords = row[9].split(';')
        del keywords[-1]
        total_citations += len(citations)
        recommendations = algo_ii.predict_citations( keywords, authorities )
        hits = 0
        for citation in citations:
            if recommendations.has_key(citation):
                hits += 1
        total_hits += hits
        print( index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n" )
        report.write(index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n")
        if (count % 50) == 0:
            print( "temp is:" )
            print( total_hits )
            print( "out of " )
            print( total_citations )
            print( "achieved accuracy:" )
            print( total_hits/float(total_citations) )
    report.close()
    data.close()
Example #2
0
def _specific_test( papers, authorities, test_ids ):
    for pid in test_ids:
        print( 'ID = '+ pid )
        pid = int(pid)

        abstract = list( papers[papers['INDEX'] == pid]['ABSTRACT'] )[0]
        print( abstract[:45] +'...' )

        keywords = list( papers[papers['INDEX'] == pid]['KEYWORDS'] )[0].split(';')
        del keywords[-1]

        recommendations = algo_ii.predict_citations( keywords, authorities )

        citations = list( papers[papers['INDEX'] == pid]['REF_ID'] )[0].split(';')
        hits = 0
        for citation in citations:
            if recommendations.has_key(citation):
                hits += 1

        print( 'Recommendations for '+ str(pid) +':' )
        print( '> '+ str( sorted( recommendations.keys() ) ) )
        print( 'Actuals:\n> '+ str( sorted(citations) ) )
        print( 'Hits = '+ str(hits) +'\n' )