Example #1
0
def add_collection_novelty():  #based on title
    all_paper_titles = {}  #average novelty is
    for user in any_user:
        print user
        stat = download_and_save_users.citulike_user_object_to_stats(
            json.load(open('citeulike_collection/users/' + user + '.json')))
        for paper in stat['papers']:
            title = paper['title']
            if not title in all_paper_titles:
                all_paper_titles[title] = 1
            else:
                all_paper_titles[title] += 1
    for user in any_user:
        print user
        stat = download_and_save_users.citulike_user_object_to_stats(
            json.load(open('citeulike_collection/users/' + user + '.json')))
        paper_novelties = []
        for paper in stat['papers']:
            title = paper['title']
            paper_novelties += [1.0 / float(all_paper_titles[paper['title']])]
        nov = 0
        if len(paper_novelties) > 0:
            nov = numpy.mean(paper_novelties)
        by_user[user]['collection_novelty'] = nov
        print nov, paper_novelties
def add_collection_novelty():#based on title
    all_paper_titles = {}#average novelty is 
    for user in any_user:
        print user
        stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json')))
        for paper in stat['papers']:
            title = paper['title'],paper
Example #3
0
def add_collection_novelty():  #based on title
    all_paper_titles = {}  #average novelty is
    for user in any_user:
        print user
        stat = download_and_save_users.citulike_user_object_to_stats(
            json.load(open('citeulike_collection/users/' + user + '.json')))
        for paper in stat['papers']:
            title = paper['title'], paper
def generate_collected_csv():
    for user in any_user:
#        print "w user",user
        stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json')))
#        print stat
        out_row = [user[:-1]]
        for set in ['rank','log','web','ice']:
            if any_user[user][set]:
                out_row.append(set)
        out_row.append(stat['collected'])
        csv_out.writerow(out_row)
Example #5
0
def generate_collected_csv():
    for user in any_user:
        #        print "w user",user
        stat = download_and_save_users.citulike_user_object_to_stats(
            json.load(open('citeulike_collection/users/' + user + '.json')))
        #        print stat
        out_row = [user[:-1]]
        for set in ['rank', 'log', 'web', 'ice']:
            if any_user[user][set]:
                out_row.append(set)
        out_row.append(stat['collected'])
        csv_out.writerow(out_row)
def add_collection_novelty():#based on title
    all_paper_titles = {}#average novelty is 
    for user in any_user:
        print user
        stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json')))
        for paper in stat['papers']:
            title = paper['title']
            if not title in all_paper_titles:
                all_paper_titles[title] = 1
            else:
                all_paper_titles[title] += 1
    for user in any_user:
        print user
        stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json')))
        paper_novelties = []
        for paper in stat['papers']:
            title = paper['title']
            paper_novelties += [1.0/ float(all_paper_titles[paper['title']])]
        nov = 0;
        if len(paper_novelties) > 0:
            nov = numpy.mean(paper_novelties)
        by_user[user]['collection_novelty'] = nov
        print nov,paper_novelties
Example #7
0
def compute_collection_metrics():
    for user in any_user:
        print "Computing collected for...", user
        stat = download_and_save_users.citulike_user_object_to_stats(
            json.load(open('citeulike_collection/users/' + user + '.json')))
        out_row = [user[:-1]]  #[user]#[user[:-1]]
        for set in ['rank', 'log', 'web', 'ice']:
            if any_user[user][set]:
                out_row.append(set)
        out_row.append(stat['collected'])
        out_row.append(user[len(user) - 1])
        #out_row.append()
        keywords = {}
        for paper in stat['papers']:
            if "keywords" in paper:
                print "paper keywords", paper['keywords']
                for keyword in paper['keywords']:
                    keywords[keyword] = 1
        by_user[user]['keyword_variety'] = len(keywords.keys())
        print "out row", out_row
        for i in range(len(collection_metrics)):
            by_user[user][collection_metrics[i]] = out_row[i]
        collected_depths = []
        for paper in stat['papers']:
            print paper['title']  #gbgbgbgbgb
            if paper['title'] in paper_title_to_depth:
                print paper_title_to_depth[paper['title']]
                collected_depths += [paper_title_to_depth[paper['title']]]
            else:
                title = "nonenone"
                if ":" in paper['title']:
                    title = paper['title'].split(":")[0]
                if title in paper_title_to_depth:
                    print paper_title_to_depth[title]
                    collected_depths += [paper_title_to_depth[title]]
                else:
                    print "???"
        if len(collected_depths) > 0:
            by_user[user]['collected_depth'] = numpy.mean(collected_depths)
        else:
            by_user[user]['collected_depth'] = 0
def compute_collection_metrics():
    for user in any_user:
        print "Computing collected for...",user
        stat = download_and_save_users.citulike_user_object_to_stats(json.load(open('citeulike_collection/users/'+user+'.json')))
        out_row = [user[:-1]]#[user]#[user[:-1]]
        for set in ['rank','log','web','ice']:
            if any_user[user][set]:
                out_row.append(set)
        out_row.append(stat['collected'])
        out_row.append(user[len(user)-1])
        #out_row.append()
        keywords = {}
        for paper in stat['papers']:
            if "keywords" in paper:
                print "paper keywords",paper['keywords']
                for keyword in paper['keywords']:
                    keywords[keyword] = 1
        by_user[user]['keyword_variety'] = len(keywords.keys())
        print "out row",out_row
        for i in range(len(collection_metrics)):
            by_user[user][collection_metrics[i]] = out_row[i]
        collected_depths = []
        for paper in stat['papers']:
            print paper['title']#gbgbgbgbgb
            if paper['title'] in paper_title_to_depth:
                print paper_title_to_depth[paper['title']]
                collected_depths += [paper_title_to_depth[paper['title']]]
            else:
                title = "nonenone"
                if ":" in paper['title']:
                    title = paper['title'].split(":")[0]
                if title in paper_title_to_depth:
                    print paper_title_to_depth[title] 
                    collected_depths += [paper_title_to_depth[title]]
                else:
                    print "???"
        if len(collected_depths) > 0:
            by_user[user]['collected_depth'] = numpy.mean( collected_depths )
        else:
            by_user[user]['collected_depth'] = 0