Python plotLinesYY 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: socintpy.util.plotter

메소드/함수: plotLinesYY

hotexamples.com에서의 예제들: 6

Python plotLinesYY - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 socintpy.util.plotter.plotLinesYY에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: compare_adopt_share.py 프로젝트: amit-sharma/social-integrator

def compare_meanified_array(items_pop1, items_pop2):
    arr1 = []
    arr2 = []
    for k, v in sorted(items_pop1.iteritems(), key=operator.itemgetter(1)):
        if v >= 10:
            arr1.append(v)
            if k in items_pop2:
                arr2.append(items_pop2[k])
            else:
                arr2.append(0)
    plotter.plotLinesYY(get_meanified_array(arr1, 100, method="median"),
                        get_meanified_array(arr2, 100, method="median"),
                        "listen",
                        "love",
                        labelx="Items",
                        labely="Number of people who interact with this item",
                        display=True,
                        logyscale=True)
    plotter.plotLinesYY(get_meanified_array(arr1, 100, method="mean"),
                        get_meanified_array(arr2, 100, method="mean"),
                        "listen",
                        "love",
                        labelx="Items",
                        labely="Number of people who interact with this item",
                        display=True,
                        logyscale=True)
    return

예제 #2

파일 보기

파일: compare_adopt_share.py 프로젝트: amit-sharma/social-integrator

def compare_meanified_array(items_pop1, items_pop2):
    arr1 = []
    arr2 = []
    for k, v in sorted(items_pop1.iteritems(), key=operator.itemgetter(1)):
        if v >= 10:
            arr1.append(v)
            if k in items_pop2:
                arr2.append(items_pop2[k])
            else:
                arr2.append(0)
    plotter.plotLinesYY(
        get_meanified_array(arr1, 100, method="median"),
        get_meanified_array(arr2, 100, method="median"),
        "listen",
        "love",
        labelx="Items",
        labely="Number of people who interact with this item",
        display=True,
        logyscale=True,
    )
    plotter.plotLinesYY(
        get_meanified_array(arr1, 100, method="mean"),
        get_meanified_array(arr2, 100, method="mean"),
        "listen",
        "love",
        labelx="Items",
        labely="Number of people who interact with this item",
        display=True,
        logyscale=True,
    )
    return

예제 #3

파일 보기

파일: compare_adopt_share.py 프로젝트: amit-sharma/social-integrator

def compare_interact_types_byuser(na,
                                  itype1,
                                  itype2,
                                  binwidth,
                                  duplicates=True,
                                  min_exposure=1,
                                  plot_type="xy",
                                  logyscale=False,
                                  logxscale=False):
    y = na.compare_interaction_types2(min_exposure=min_exposure,
                                      return_duplicates=duplicates)
    tuple_arr = [(y[itype1][i], y[itype2][i]) for i in range(len(y[itype1]))
                 if y[itype1][i] > 0]
    itype1_arr = [val[0] for val in tuple_arr]
    itype2_arr = [val[1] for val in tuple_arr]
    print max(itype1_arr), max(itype2_arr)

    itype1_arr2 = get_meanified_array(itype1_arr, binwidth)
    itype2_arr2 = get_meanified_array(itype2_arr, binwidth)

    labely_part = "Interactions" if duplicates else "Artists"
    if plot_type == "yy":
        plotter.plotLinesYY(itype1_arr2,
                            itype2_arr2,
                            itype1,
                            itype2,
                            labelx="Users (sorted by listen)",
                            labely="Number of " + labely_part,
                            display=True,
                            logyscale=False)
    elif plot_type == "xy":
        """
        plotter.plotLinesXY(itype1_arr, itype2_arr, labelx=itype1,
                            labely=itype2, title_str="Number of "+ labely_part,
                            display=True, logyscale=logyscale, logxscale=logxscale,
                            ylim_val=[0, 100000])
        """
        import matplotlib.pyplot as plt
        plt.loglog(itype1_arr, itype2_arr, 'o')
        plt.show()
    return itype1_arr, itype2_arr

예제 #4

파일 보기

파일: compare_adopt_share.py 프로젝트: amit-sharma/social-integrator

def compare_interact_types_byuser(
    na, itype1, itype2, binwidth, duplicates=True, min_exposure=1, plot_type="xy", logyscale=False, logxscale=False
):
    y = na.compare_interaction_types2(min_exposure=min_exposure, return_duplicates=duplicates)
    tuple_arr = [(y[itype1][i], y[itype2][i]) for i in range(len(y[itype1])) if y[itype1][i] > 0]
    itype1_arr = [val[0] for val in tuple_arr]
    itype2_arr = [val[1] for val in tuple_arr]
    print max(itype1_arr), max(itype2_arr)

    itype1_arr2 = get_meanified_array(itype1_arr, binwidth)
    itype2_arr2 = get_meanified_array(itype2_arr, binwidth)

    labely_part = "Interactions" if duplicates else "Artists"
    if plot_type == "yy":
        plotter.plotLinesYY(
            itype1_arr2,
            itype2_arr2,
            itype1,
            itype2,
            labelx="Users (sorted by listen)",
            labely="Number of " + labely_part,
            display=True,
            logyscale=False,
        )
    elif plot_type == "xy":
        """
        plotter.plotLinesXY(itype1_arr, itype2_arr, labelx=itype1,
                            labely=itype2, title_str="Number of "+ labely_part,
                            display=True, logyscale=logyscale, logxscale=logxscale,
                            ylim_val=[0, 100000])
        """
        import matplotlib.pyplot as plt

        plt.loglog(itype1_arr, itype2_arr, "o")
        plt.show()
    return itype1_arr, itype2_arr

예제 #5

파일 보기

파일: network_analyzer_example.py 프로젝트: amit-sharma/social-integrator

def run_computation(data, computation_cmd, outf, interact_type, create_fake_prefs,
        allow_duplicates, split_date_str, dataset_domain, dataset_path,
        min_interacts_beforeaftersplit_per_user,
        max_interact_ratio_error, max_sim_ratio_error, min_friends_match_ratio, 
        traindata_fraction, M):
    net_analyzer = BasicNetworkAnalyzer(data)
    interaction_types = data.interact_types_dict
    filename_prefix = computation_cmd if computation_cmd is not None else ""

    if computation_cmd=="basic_stats" or computation_cmd is None:
        net_analyzer.show_basic_stats()
        ## use below if you want to write a new dataset (e.g. after filtering)
        data.store_ego_dataset("/home/amit/datasets/social_activity_data/lastfm_filtered_listen/", write_maps=False)
        #data.compute_allpairs_sim(interact_type, data_type=ord("a"))

    elif computation_cmd=="random_similarity":
        for type_name, type_index in interaction_types.iteritems():
            circlesims, globalsims = net_analyzer.compare_circle_global_similarity(type_index, num_random_trials=5, cutoff_rating=cutoff_rating)
            #plotter.plotLinesYY(circlesims, globalsims, "Friends", "Global")
            outf.write("User_id\tcircle_sim\tnonfriend_sim\n")
            outf.write(type_name + '\n')
            for ind in range(len(circlesims)):
                outf.write("%s\t%f\t%f\n" %(circlesims[ind][0], circlesims[ind][1], globalsims[ind][1]))
            print "\n", type_name, ":" 
            print "Circle Average", sum([v2 for v1,v2 in circlesims])/float(len(circlesims))
            print "Global Average", sum([v2 for v1,v2 in globalsims])/float(len(globalsims))

    elif computation_cmd=="knn_similarity":
        #Compute K-nearest similarity
        KLIMITS = [10]
        outf.write("User_id\tk\tcircle_sim\tnonfriend_sim\n")
        
        for type_name, type_index in interaction_types.iteritems():
            for curr_lim in KLIMITS:
                plot_circle, plot_external = net_analyzer.compare_circle_global_knnsimilarity(type_index, klim=curr_lim, cutoff_rating=cutoff_rating)
                compare_sims(plot_circle, plot_external)
                outf.write(type_name+'\n')
                for ind in range(len(plot_circle)):
                    outf.write("%s\t%d\t%f\t%f\n" %(plot_circle[ind][0], curr_lim, plot_circle[ind][1], plot_external[ind][1]))
                #plotter.plotLinesYY(plot_circle, plot_external, "Friends", "Global")
                print type_name, "K", curr_lim
                print "Circle Average", utils.mean_sd([v2 for v1,v2 in plot_circle]), len(plot_circle)
                print "Global Average", utils.mean_sd([v2 for v1,v2 in plot_external]), len(plot_external)

    elif computation_cmd=="knn_recommender":
        #Compute K-nearest recommender
        KLIMITS = [10]
        rec_analyzer = RecommenderAnalyzer(data, max_recs_shown=10, traintest_split=0.7, cutoff_rating=cutoff_rating)
        outf.write("User_id\tk\trun_index\tcircle_ndcg\tnonfriend_ndcg\n")
        for type_name, type_index in interaction_types.iteritems():
            for curr_lim in KLIMITS:
                local_avg=[]
                global_avg=[]
                Ntotal = 10
                for i in range(Ntotal): # randomize because of training-test split.
                    plot_circle, plot_external = rec_analyzer.compare_knearest_recommenders(type_index, klim=curr_lim, num_processes=2)
                    compare_sims(plot_circle, plot_external)
                    outf.write(type_name + "\n")
                    for ind in range(len(plot_circle)):
                        outf.write("%s\t%d\t%d\t%f\t%f\n" %(plot_circle[ind][0], curr_lim, i, plot_circle[ind][1], plot_external[ind][1]))
                    print "\n", type_name, "K", curr_lim

                    #print plot_circle, plot_external
                    curr_avg_local = utils.mean_sd([v2 for v1,v2 in plot_circle])
                    curr_avg_global =  utils.mean_sd([v2 for v1,v2 in plot_external])
                    print "Circle Average", curr_avg_local
                    print "Global Average", curr_avg_global
                    local_avg.append(curr_avg_local[0])
                    global_avg.append(curr_avg_global[0])
                    #plotLinesYY(plot_circle, plot_external, "Friends", "Global")
                print "Local", sum(local_avg)/float(Ntotal)
                print "Global", sum(global_avg)/float(Ntotal)
    elif computation_cmd == "circle_coverage":
        lim_friends = [(5,10), (10,20), (20,50), (50,100)]
        for fr_limit in lim_friends:
            locality_analyzer = LocalityAnalyzer(data)
            coverage_list = locality_analyzer.compare_circle_item_coverages(0, fr_limit[0], fr_limit[1])
            plotter.plotLineY(sorted(coverage_list), "User", "Fraction of Items Covered with %d-%d friends" % (fr_limit[0], fr_limit[1]))
            print utils.mean_sd(coverage_list)
    elif computation_cmd == "items_edge_coverage":
        locality_analyzer = LocalityAnalyzer(data)
        items_cov_list, items_popularity, cov_ratio_list = locality_analyzer.compare_items_edge_coverage(1, minimum_interactions=1)
        print utils.mean_sd(items_cov_list)
        print utils.mean_sd(items_popularity)
        #plotter.plotHist(sorted([val for val in cov_ratio_list if val<=1]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True)
        #####plotter.plotHist(sorted([val for val in cov_ratio_list]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True)
        #plotter.plotHist(sorted(items_popularity), "Item", "total popularity")
        plotter.plotCumulativePopularity(items_popularity, labelx="Item percentile", labely="Cum. percent of number of likes")
    elif computation_cmd == "network_draw":
        net_visualizor = NetworkVisualizor(data)
        net_visualizor.draw_network()
    elif computation_cmd == "network_item_adopt":
        net_visualizor = NetworkVisualizor(data)
        pprint(net_visualizor.plot_item_adoption(1669118))
    elif computation_cmd == "node_details":
        for node_id in open('user_ids'):
            if node_id.strip('\n') != "User_id":
                net_analyzer.get_node_details(int(node_id.strip('\n')))
    elif computation_cmd=="store_dataset":
        user_interacts = net_analyzer.get_user_interacts(1, cutoff_rating)
        f = open(outf_path+ 'user_interacts_'+dataset_domain+'.tsv', 'w')
        f.write("user_id\titem_id\ttimestamp\n")
        for user_id, item_id, timestamp in user_interacts:
            f.write("%s\t%s\t%s\n" %(user_id, item_id, timestamp)) 
        f.close()
        
        item_pop = net_analyzer.get_items_popularity(1, cutoff_rating)    
        f = open(outf_path+'items_'+dataset_domain+'.tsv','w')
        f.write("item_id\tpopularity\n")
        for item_id, pop in item_pop.iteritems():
            f.write("%s\t%s\n" %(item_id, pop))
        f.close()

        user_friends = net_analyzer.get_user_friends()
        f = open('user_friends_'+dataset_domain+'.tsv','w')
        f.write("user_id\tfriend_id\n")
        for user_id, friend_id in user_friends:
            f.write("%s\t%s\n" %(user_id, friend_id))
        f.close()
        print "Successfully stored tsv dataset"
    elif computation_cmd=="compare_interact_types":
        num_interacts_dict = net_analyzer.compare_interaction_types()
        interact_types = num_interacts_dict.keys()
        plotter.plotLinesYY(num_interacts_dict[interact_types[0]], 
                            num_interacts_dict[interact_types[1]],
                            interact_types[0], interact_types[1], 
                            display=True, logyscale=True)
         
        plotter.plotLinesYY(num_interacts_dict[interact_types[1]], 
                            num_interacts_dict[interact_types[2]],
                            interact_types[1], interact_types[2], 
                            display=True, logyscale=True)
         
        plotter.plotLinesYY(num_interacts_dict[interact_types[0]], 
                            num_interacts_dict[interact_types[2]],
                            interact_types[0], interact_types[2], 
                            display=True, logyscale=True)
    elif computation_cmd=="influence_test":
        #   ta = TemporalAnalyzer(data)
        #interact_type = data.interact_types_dict["listen"
        # time_scale can be 'w':wallclock_time or 'o':ordinal_time
        split_date_str = "2008/01/01"
        t_window = -1
        t_scale = ord('w')
        max_tries_val = 10000
        max_node_computes_val = 100
        max_interact_ratio_error = 0.1
        klim_val=5
        split_timestamp = int(time.mktime(datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple()))
        # crate trainig test sets that will be used by fake geernation
        data.create_training_test_bytime(interact_type, split_timestamp)
        if create_fake_prefs is not None:
            print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
            fake_data.generate_fake_preferences(data,interact_type, split_timestamp, 
                        min_interactions_beforeaftersplit_per_user=min_interacts_beforeaftersplit_per_user,
                        time_window=t_window, time_scale=t_scale, method=create_fake_prefs)
            
            #fake_data.generate_random_preferences(data, interact_type, split_timestamp)
            print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
        # Need to generate again because fake data changes test data           
        data.create_training_test_bytime(interact_type, split_timestamp)
        
        la = LocalityAnalyzer(data)
        inf_tuple = compute.test_influence(la, interact_type=interact_type, 
                               time_diff=t_window, time_scale=ord('w'), split_timestamp=split_timestamp, 
                               #time_diff=100000, split_date_str="1970/06/23", 
                               control_divider=0.01,
                               min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user,
                               max_tries = max_tries_val, max_node_computes=max_node_computes_val, num_processes=4,
                               max_interact_ratio_error=max_interact_ratio_error,
                               klim=klim_val,
                               method="influence")
        print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3])
        num_vals = len(inf_tuple[0])
        f = open("influence_test", "w")
        for i in range(num_vals):
            f.write("%f\t%f\t%f\t%f\n" % (inf_tuple[0][i], inf_tuple[1][i], 
                        inf_tuple[2][i], inf_tuple[3][i]))
        f.close()
             
    elif computation_cmd=="suscept_test":
        use_artists = "songs" if "songs" in dataset_path else "artists"
        interact_type_str = "listen" if interact_type==0 else "love"
        #M = [50]#,20]#,30,40,50]
        t_scale = ord('o') # ordinal scale, this is the default used in paper.
        NUM_NODES_TO_COMPUTE = 4000000 # maximum number nodes to compute?
        num_threads=4 # the number of threads to spawn
        max_tries_val = None#30000 # should we stop after max_tries?
        max_node_computes_val = NUM_NODES_TO_COMPUTE/num_threads # number of nodes to compute at each node
        #max_interact_ratio_error =0.2 # these are errors (defaults are 0.1,0.1)
        #max_sim_ratio_error = 0.2
        #min_friends_match_ratio = 0.5 # important to be 1 for simulation--because e.g. in influence, we use a person's all friends to compute his next like
        klim_val = None # not used for influence test
        nonfr_match = "random" #random, serial, kbest. Default is random.
        num_loop = 1 # number of times we calculate this. For averaging results over multiple runs.
        f = open("suscept_test_results/"+dataset_domain + dataset_path.split("/")[-2] + interact_type_str+ strftime("%Y-%m-%d_%H:%M:%S")+'.dat', 'w')
        f.write("# use_artists=%r\tallow_duplicates=%r\tmax_node_computes_val=%d\tcreate_fake_prefs=%r\tnum_loop=%d\n" % (
                    use_artists, allow_duplicates, max_node_computes_val,
                        create_fake_prefs, num_loop))
        f.write("# split_train_test_date=%s\ttime_scale=%d\tmin_interactions_beforeaftersplit_per_user=%d\tnum_threads=%d\n" % (
                    split_date_str, t_scale, min_interacts_beforeaftersplit_per_user, num_threads))
        f.write("# max_interact_ratio_error=%f\tmax_sim_ratio_error=%f\tmin_friends_match_ratio=%f\n" %(
                    max_interact_ratio_error, max_sim_ratio_error, min_friends_match_ratio
                    ))
        for t_window in M:
            for h in range(num_loop):
                f.write("\n\n################### ALERTINFO: STARTING ITERATION %d  with M=%d\n" %( h, t_window))
                if split_date_str=="test": split_timestamp = 2000
                else:
                    split_timestamp = int(time.mktime(datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple()))
                #split_timestamp=25000000
                if create_fake_prefs is not None:
                    data.create_training_test_bytime(interact_type, split_timestamp)
                    #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
                    fake_data.generate_fake_preferences(data,interact_type, split_timestamp,
                            min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user,
                            time_window=t_window, time_scale=t_scale, method=create_fake_prefs)
                    #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
                # Need to generate again because fake data changes test data           
                data.create_training_test_bytime(interact_type, split_timestamp, min_interactions_beforeaftersplit_per_user=min_interacts_beforeaftersplit_per_user)
                la = LocalityAnalyzer(data)
                inf_tuple = compute.test_influence(la, interact_type=interact_type, 
                                       time_diff=t_window, time_scale=t_scale, split_timestamp=split_timestamp, 
                                       #time_diff=100000, split_date_str="1970/06/23", 
                                       control_divider=0.01, # not used anymore
                                       min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user,
                                       max_tries = max_tries_val, max_node_computes=max_node_computes_val, num_threads=num_threads,
                                       max_interact_ratio_error = max_interact_ratio_error,
                                       max_sim_ratio_error = max_sim_ratio_error,
                                       min_friends_match_ratio=min_friends_match_ratio,
                                       klim = klim_val,
                                       nonfr_match=nonfr_match,
                                       method="suscept", 
                                       allow_duplicates=allow_duplicates)
                print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3])
                num_vals = len(inf_tuple[0])
                f.write("TestSetSize\tFrSimilarity\tNonFrSimilarity\tFrOverlap\tNonFrOverlap\tRandom_run_no\tM\n")
                for i in range(num_vals):
                    f.write("%d\t%f\t%f\t%f\t%f\t%d\t%d\n" % (inf_tuple[0][i], inf_tuple[1][i], 
                                inf_tuple[2][i], inf_tuple[3][i], inf_tuple[4][i], h, t_window))
        f.close()
    elif computation_cmd=="gen_adopt_data":
        t_window = 100 
        t_scale = ord('o')
        if split_date_str=="test": split_timestamp = 2000
        else:
            split_timestamp = int(time.mktime(datetime.datetime.strptime(split_date_str, "%Y/%m/%d").timetuple()))
        if create_fake_prefs is not None:
            data.create_training_test_bytime(interact_type, split_timestamp)
            #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
            fake_data.generate_fake_preferences(data,interact_type, split_timestamp,
                    min_interactions_beforeaftersplit_per_user = min_interacts_beforeaftersplit_per_user,
                    time_window=t_window, time_scale=t_scale, method=create_fake_prefs)
        
        data.create_training_test_bytime(interact_type, split_timestamp)
        gen_adopt.generate_adoption_data(data, interact_type, split_timestamp, 
            min_interactions_beforeaftersplit_per_user=min_interacts_beforeaftersplit_per_user, time_window=t_window, 
            time_scale=t_scale)
    elif computation_cmd=="compute_split_date":
        ret_timestamp = compute.compute_cutoff_date(data, interact_type, traindata_fraction)
        print ret_timestamp
        print datetime.datetime.fromtimestamp(ret_timestamp*86400).strftime("%Y-%m-%d")
    """

예제 #6

파일 보기

def run_computation(data, computation_cmd, outf, interact_type,
                    create_fake_prefs, allow_duplicates, split_date_str,
                    dataset_domain, dataset_path,
                    min_interacts_beforeaftersplit_per_user,
                    max_interact_ratio_error, max_sim_ratio_error,
                    min_friends_match_ratio, traindata_fraction, M):
    net_analyzer = BasicNetworkAnalyzer(data)
    interaction_types = data.interact_types_dict
    filename_prefix = computation_cmd if computation_cmd is not None else ""

    if computation_cmd == "basic_stats" or computation_cmd is None:
        net_analyzer.show_basic_stats()
        ## use below if you want to write a new dataset (e.g. after filtering)
        data.store_ego_dataset(
            "/home/amit/datasets/social_activity_data/lastfm_filtered_listen/",
            write_maps=False)
        #data.compute_allpairs_sim(interact_type, data_type=ord("a"))

    elif computation_cmd == "random_similarity":
        for type_name, type_index in interaction_types.iteritems():
            circlesims, globalsims = net_analyzer.compare_circle_global_similarity(
                type_index, num_random_trials=5, cutoff_rating=cutoff_rating)
            #plotter.plotLinesYY(circlesims, globalsims, "Friends", "Global")
            outf.write("User_id\tcircle_sim\tnonfriend_sim\n")
            outf.write(type_name + '\n')
            for ind in range(len(circlesims)):
                outf.write("%s\t%f\t%f\n" %
                           (circlesims[ind][0], circlesims[ind][1],
                            globalsims[ind][1]))
            print "\n", type_name, ":"
            print "Circle Average", sum([v2 for v1, v2 in circlesims]) / float(
                len(circlesims))
            print "Global Average", sum([v2 for v1, v2 in globalsims]) / float(
                len(globalsims))

    elif computation_cmd == "knn_similarity":
        #Compute K-nearest similarity
        KLIMITS = [10]
        outf.write("User_id\tk\tcircle_sim\tnonfriend_sim\n")

        for type_name, type_index in interaction_types.iteritems():
            for curr_lim in KLIMITS:
                plot_circle, plot_external = net_analyzer.compare_circle_global_knnsimilarity(
                    type_index, klim=curr_lim, cutoff_rating=cutoff_rating)
                compare_sims(plot_circle, plot_external)
                outf.write(type_name + '\n')
                for ind in range(len(plot_circle)):
                    outf.write("%s\t%d\t%f\t%f\n" %
                               (plot_circle[ind][0], curr_lim,
                                plot_circle[ind][1], plot_external[ind][1]))
                #plotter.plotLinesYY(plot_circle, plot_external, "Friends", "Global")
                print type_name, "K", curr_lim
                print "Circle Average", utils.mean_sd(
                    [v2 for v1, v2 in plot_circle]), len(plot_circle)
                print "Global Average", utils.mean_sd(
                    [v2 for v1, v2 in plot_external]), len(plot_external)

    elif computation_cmd == "knn_recommender":
        #Compute K-nearest recommender
        KLIMITS = [10]
        rec_analyzer = RecommenderAnalyzer(data,
                                           max_recs_shown=10,
                                           traintest_split=0.7,
                                           cutoff_rating=cutoff_rating)
        outf.write("User_id\tk\trun_index\tcircle_ndcg\tnonfriend_ndcg\n")
        for type_name, type_index in interaction_types.iteritems():
            for curr_lim in KLIMITS:
                local_avg = []
                global_avg = []
                Ntotal = 10
                for i in range(
                        Ntotal):  # randomize because of training-test split.
                    plot_circle, plot_external = rec_analyzer.compare_knearest_recommenders(
                        type_index, klim=curr_lim, num_processes=2)
                    compare_sims(plot_circle, plot_external)
                    outf.write(type_name + "\n")
                    for ind in range(len(plot_circle)):
                        outf.write(
                            "%s\t%d\t%d\t%f\t%f\n" %
                            (plot_circle[ind][0], curr_lim, i,
                             plot_circle[ind][1], plot_external[ind][1]))
                    print "\n", type_name, "K", curr_lim

                    #print plot_circle, plot_external
                    curr_avg_local = utils.mean_sd(
                        [v2 for v1, v2 in plot_circle])
                    curr_avg_global = utils.mean_sd(
                        [v2 for v1, v2 in plot_external])
                    print "Circle Average", curr_avg_local
                    print "Global Average", curr_avg_global
                    local_avg.append(curr_avg_local[0])
                    global_avg.append(curr_avg_global[0])
                    #plotLinesYY(plot_circle, plot_external, "Friends", "Global")
                print "Local", sum(local_avg) / float(Ntotal)
                print "Global", sum(global_avg) / float(Ntotal)
    elif computation_cmd == "circle_coverage":
        lim_friends = [(5, 10), (10, 20), (20, 50), (50, 100)]
        for fr_limit in lim_friends:
            locality_analyzer = LocalityAnalyzer(data)
            coverage_list = locality_analyzer.compare_circle_item_coverages(
                0, fr_limit[0], fr_limit[1])
            plotter.plotLineY(
                sorted(coverage_list), "User",
                "Fraction of Items Covered with %d-%d friends" %
                (fr_limit[0], fr_limit[1]))
            print utils.mean_sd(coverage_list)
    elif computation_cmd == "items_edge_coverage":
        locality_analyzer = LocalityAnalyzer(data)
        items_cov_list, items_popularity, cov_ratio_list = locality_analyzer.compare_items_edge_coverage(
            1, minimum_interactions=1)
        print utils.mean_sd(items_cov_list)
        print utils.mean_sd(items_popularity)
        #plotter.plotHist(sorted([val for val in cov_ratio_list if val<=1]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True)
        #####plotter.plotHist(sorted([val for val in cov_ratio_list]), "Ratio of Edge coverage to total popularity", "Frequency", logyscale=True)
        #plotter.plotHist(sorted(items_popularity), "Item", "total popularity")
        plotter.plotCumulativePopularity(
            items_popularity,
            labelx="Item percentile",
            labely="Cum. percent of number of likes")
    elif computation_cmd == "network_draw":
        net_visualizor = NetworkVisualizor(data)
        net_visualizor.draw_network()
    elif computation_cmd == "network_item_adopt":
        net_visualizor = NetworkVisualizor(data)
        pprint(net_visualizor.plot_item_adoption(1669118))
    elif computation_cmd == "node_details":
        for node_id in open('user_ids'):
            if node_id.strip('\n') != "User_id":
                net_analyzer.get_node_details(int(node_id.strip('\n')))
    elif computation_cmd == "store_dataset":
        user_interacts = net_analyzer.get_user_interacts(1, cutoff_rating)
        f = open(outf_path + 'user_interacts_' + dataset_domain + '.tsv', 'w')
        f.write("user_id\titem_id\ttimestamp\n")
        for user_id, item_id, timestamp in user_interacts:
            f.write("%s\t%s\t%s\n" % (user_id, item_id, timestamp))
        f.close()

        item_pop = net_analyzer.get_items_popularity(1, cutoff_rating)
        f = open(outf_path + 'items_' + dataset_domain + '.tsv', 'w')
        f.write("item_id\tpopularity\n")
        for item_id, pop in item_pop.iteritems():
            f.write("%s\t%s\n" % (item_id, pop))
        f.close()

        user_friends = net_analyzer.get_user_friends()
        f = open('user_friends_' + dataset_domain + '.tsv', 'w')
        f.write("user_id\tfriend_id\n")
        for user_id, friend_id in user_friends:
            f.write("%s\t%s\n" % (user_id, friend_id))
        f.close()
        print "Successfully stored tsv dataset"
    elif computation_cmd == "compare_interact_types":
        num_interacts_dict = net_analyzer.compare_interaction_types()
        interact_types = num_interacts_dict.keys()
        plotter.plotLinesYY(num_interacts_dict[interact_types[0]],
                            num_interacts_dict[interact_types[1]],
                            interact_types[0],
                            interact_types[1],
                            display=True,
                            logyscale=True)

        plotter.plotLinesYY(num_interacts_dict[interact_types[1]],
                            num_interacts_dict[interact_types[2]],
                            interact_types[1],
                            interact_types[2],
                            display=True,
                            logyscale=True)

        plotter.plotLinesYY(num_interacts_dict[interact_types[0]],
                            num_interacts_dict[interact_types[2]],
                            interact_types[0],
                            interact_types[2],
                            display=True,
                            logyscale=True)
    elif computation_cmd == "influence_test":
        #   ta = TemporalAnalyzer(data)
        #interact_type = data.interact_types_dict["listen"
        # time_scale can be 'w':wallclock_time or 'o':ordinal_time
        split_date_str = "2008/01/01"
        t_window = -1
        t_scale = ord('w')
        max_tries_val = 10000
        max_node_computes_val = 100
        max_interact_ratio_error = 0.1
        klim_val = 5
        split_timestamp = int(
            time.mktime(
                datetime.datetime.strptime(split_date_str,
                                           "%Y/%m/%d").timetuple()))
        # crate trainig test sets that will be used by fake geernation
        data.create_training_test_bytime(interact_type, split_timestamp)
        if create_fake_prefs is not None:
            print data.get_nodes_list()[1].get_interactions(interact_type,
                                                            cutoff_rating=-1)
            fake_data.generate_fake_preferences(
                data,
                interact_type,
                split_timestamp,
                min_interactions_beforeaftersplit_per_user=
                min_interacts_beforeaftersplit_per_user,
                time_window=t_window,
                time_scale=t_scale,
                method=create_fake_prefs)

            #fake_data.generate_random_preferences(data, interact_type, split_timestamp)
            print data.get_nodes_list()[1].get_interactions(interact_type,
                                                            cutoff_rating=-1)
        # Need to generate again because fake data changes test data
        data.create_training_test_bytime(interact_type, split_timestamp)

        la = LocalityAnalyzer(data)
        inf_tuple = compute.test_influence(
            la,
            interact_type=interact_type,
            time_diff=t_window,
            time_scale=ord('w'),
            split_timestamp=split_timestamp,
            #time_diff=100000, split_date_str="1970/06/23",
            control_divider=0.01,
            min_interactions_beforeaftersplit_per_user=
            min_interacts_beforeaftersplit_per_user,
            max_tries=max_tries_val,
            max_node_computes=max_node_computes_val,
            num_processes=4,
            max_interact_ratio_error=max_interact_ratio_error,
            klim=klim_val,
            method="influence")
        print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3])
        num_vals = len(inf_tuple[0])
        f = open("influence_test", "w")
        for i in range(num_vals):
            f.write("%f\t%f\t%f\t%f\n" % (inf_tuple[0][i], inf_tuple[1][i],
                                          inf_tuple[2][i], inf_tuple[3][i]))
        f.close()

    elif computation_cmd == "suscept_test":
        use_artists = "songs" if "songs" in dataset_path else "artists"
        interact_type_str = "listen" if interact_type == 0 else "love"
        #M = [50]#,20]#,30,40,50]
        t_scale = ord('o')  # ordinal scale, this is the default used in paper.
        NUM_NODES_TO_COMPUTE = 4000000  # maximum number nodes to compute?
        num_threads = 4  # the number of threads to spawn
        max_tries_val = None  #30000 # should we stop after max_tries?
        max_node_computes_val = NUM_NODES_TO_COMPUTE / num_threads  # number of nodes to compute at each node
        #max_interact_ratio_error =0.2 # these are errors (defaults are 0.1,0.1)
        #max_sim_ratio_error = 0.2
        #min_friends_match_ratio = 0.5 # important to be 1 for simulation--because e.g. in influence, we use a person's all friends to compute his next like
        klim_val = None  # not used for influence test
        nonfr_match = "random"  #random, serial, kbest. Default is random.
        num_loop = 1  # number of times we calculate this. For averaging results over multiple runs.
        f = open(
            "suscept_test_results/" + dataset_domain +
            dataset_path.split("/")[-2] + interact_type_str +
            strftime("%Y-%m-%d_%H:%M:%S") + '.dat', 'w')
        f.write(
            "# use_artists=%r\tallow_duplicates=%r\tmax_node_computes_val=%d\tcreate_fake_prefs=%r\tnum_loop=%d\n"
            % (use_artists, allow_duplicates, max_node_computes_val,
               create_fake_prefs, num_loop))
        f.write(
            "# split_train_test_date=%s\ttime_scale=%d\tmin_interactions_beforeaftersplit_per_user=%d\tnum_threads=%d\n"
            % (split_date_str, t_scale,
               min_interacts_beforeaftersplit_per_user, num_threads))
        f.write(
            "# max_interact_ratio_error=%f\tmax_sim_ratio_error=%f\tmin_friends_match_ratio=%f\n"
            % (max_interact_ratio_error, max_sim_ratio_error,
               min_friends_match_ratio))
        for t_window in M:
            for h in range(num_loop):
                f.write(
                    "\n\n################### ALERTINFO: STARTING ITERATION %d  with M=%d\n"
                    % (h, t_window))
                if split_date_str == "test": split_timestamp = 2000
                else:
                    split_timestamp = int(
                        time.mktime(
                            datetime.datetime.strptime(
                                split_date_str, "%Y/%m/%d").timetuple()))
                #split_timestamp=25000000
                if create_fake_prefs is not None:
                    data.create_training_test_bytime(interact_type,
                                                     split_timestamp)
                    #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
                    fake_data.generate_fake_preferences(
                        data,
                        interact_type,
                        split_timestamp,
                        min_interactions_beforeaftersplit_per_user=
                        min_interacts_beforeaftersplit_per_user,
                        time_window=t_window,
                        time_scale=t_scale,
                        method=create_fake_prefs)
                    #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
                # Need to generate again because fake data changes test data
                data.create_training_test_bytime(
                    interact_type,
                    split_timestamp,
                    min_interactions_beforeaftersplit_per_user=
                    min_interacts_beforeaftersplit_per_user)
                la = LocalityAnalyzer(data)
                inf_tuple = compute.test_influence(
                    la,
                    interact_type=interact_type,
                    time_diff=t_window,
                    time_scale=t_scale,
                    split_timestamp=split_timestamp,
                    #time_diff=100000, split_date_str="1970/06/23",
                    control_divider=0.01,  # not used anymore
                    min_interactions_beforeaftersplit_per_user=
                    min_interacts_beforeaftersplit_per_user,
                    max_tries=max_tries_val,
                    max_node_computes=max_node_computes_val,
                    num_threads=num_threads,
                    max_interact_ratio_error=max_interact_ratio_error,
                    max_sim_ratio_error=max_sim_ratio_error,
                    min_friends_match_ratio=min_friends_match_ratio,
                    klim=klim_val,
                    nonfr_match=nonfr_match,
                    method="suscept",
                    allow_duplicates=allow_duplicates)
                print "t-test results", ttest_rel(inf_tuple[2], inf_tuple[3])
                num_vals = len(inf_tuple[0])
                f.write(
                    "TestSetSize\tFrSimilarity\tNonFrSimilarity\tFrOverlap\tNonFrOverlap\tRandom_run_no\tM\n"
                )
                for i in range(num_vals):
                    f.write("%d\t%f\t%f\t%f\t%f\t%d\t%d\n" %
                            (inf_tuple[0][i], inf_tuple[1][i], inf_tuple[2][i],
                             inf_tuple[3][i], inf_tuple[4][i], h, t_window))
        f.close()
    elif computation_cmd == "gen_adopt_data":
        t_window = 100
        t_scale = ord('o')
        if split_date_str == "test": split_timestamp = 2000
        else:
            split_timestamp = int(
                time.mktime(
                    datetime.datetime.strptime(split_date_str,
                                               "%Y/%m/%d").timetuple()))
        if create_fake_prefs is not None:
            data.create_training_test_bytime(interact_type, split_timestamp)
            #print data.get_nodes_list()[1].get_interactions(interact_type, cutoff_rating=-1)
            fake_data.generate_fake_preferences(
                data,
                interact_type,
                split_timestamp,
                min_interactions_beforeaftersplit_per_user=
                min_interacts_beforeaftersplit_per_user,
                time_window=t_window,
                time_scale=t_scale,
                method=create_fake_prefs)

        data.create_training_test_bytime(interact_type, split_timestamp)
        gen_adopt.generate_adoption_data(
            data,
            interact_type,
            split_timestamp,
            min_interactions_beforeaftersplit_per_user=
            min_interacts_beforeaftersplit_per_user,
            time_window=t_window,
            time_scale=t_scale)
    elif computation_cmd == "compute_split_date":
        ret_timestamp = compute.compute_cutoff_date(data, interact_type,
                                                    traindata_fraction)
        print ret_timestamp
        print datetime.datetime.fromtimestamp(ret_timestamp *
                                              86400).strftime("%Y-%m-%d")
    """