def main(graph_name):

    cutting_day = 175  # to separate   training-testing

    G = nx.read_gml(graph_name)

    list_id_weekends_T3 = look_for_T3_weekends(
        G
    )  # T3 doesnt share fellows in the weekend  (but they are the exception)

    all_team = "NO"  # as adopters or not
    Nbins = 20  # for the histogram of sum of distances

    dir_real_data = '../Results/'

    dir = "../Results/weight_shifts/infection/"

    delta_end = 3.  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)

    Niter_training = 1000

    fixed_param = ""  #"FIXED_Pimm0_"    # or ""  # for the Results file that contains the sorted list of best parameters

    output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_train_test_" + str(
        Niter_training) + "iter.dat"
    file3 = open(output_file3, 'wt')

    file3.close()

    ######################################################################################
    #  I read the file of the actual evolution of the idea spreading in the hospital:   ##
    ######################################################################################

    if all_team == "YES":
        print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
        exit()

    else:
        filename_actual_evol = "../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"

    file1 = open(
        filename_actual_evol, 'r'
    )  ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
    list_lines_file = file1.readlines()

    list_actual_evol = []
    for line in list_lines_file:  # [1:]:   # i exclude the first row

        num_adopters = float(line.split(" ")[1])
        list_actual_evol.append(num_adopters)

    list_actual_evol_training = list_actual_evol[:cutting_day]
    #   list_actual_evol_testing=list_actual_evol[(cutting_day-1):]   #i dont use this

    ##################################################################

    #../Results/network_final_schedule_withTeam3/infection/Average_time_evolution_Infection_p0.9_Immune0.5_1000iter_2012.dat

    prob_min = 0.0
    prob_max = 1.01
    delta_prob = 0.1

    prob_Immune_min = 0.00
    prob_Immune_max = 1.01
    delta_prob_Immune = 0.1

    list_dist_at_ending_point_fixed_parameters = []
    dict_filenames_tot_distance = {
    }  # i will save the filename as key and the tot distance from that curve to the original one
    dict_filenames_prod_distances = {}

    prob_Immune = prob_Immune_min
    while prob_Immune <= prob_Immune_max:

        print "prom Immune:", prob_Immune

        prob_infection = prob_min
        while prob_infection <= prob_max:

            print "  p:", prob_infection

            output_file2 = dir + "Average_time_evolution_Infection_training_p" + str(
                prob_infection) + "_" + "Immune" + str(
                    prob_Immune) + "_" + str(
                        Niter_training) + "iter_2012_avg_ic_day" + str(
                            cutting_day) + ".dat"
            #   file2 = open(output_file2,'wt')                                          I DONT NEED TO WRITE IT, COS I WILL USE THE WHOLE FILE FROM THE WHOLE FIT, WITH THE PARAMETER VALUES THAT THE TESTING-UP-TODAY-125 TELLS ME
            #  file2.close()

            # i create the empty list of list for the Niter temporal evolutions
            num_shifts = 0
            num_Drs = 0.
            for n in G.nodes():
                G.node[n]["status"] = "S"
                if G.node[n]['type'] == "shift":
                    num_shifts += 1
                else:
                    num_Drs += 1.

        #  list_final_I_values_fixed_p=[]  # i dont care about the final values right now, but about the whole time evol
            list_lists_t_evolutions = []

            list_dist_fixed_parameters = []
            list_dist_abs_at_ending_point_fixed_parameters = []
            list_final_num_infected = []

            for iter in range(Niter_training):

                #   print "     iter:",iter

                list_I = []  #list infected doctors
                list_ordering = []
                list_s = []
                list_A = []
                list_F = []

                ########### set I.C.

                max_order = 0
                for n in G.nodes():
                    G.node[n]["status"] = "S"  # all nodes are Susceptible
                    if G.node[n]['type'] == "shift":
                        list_s.append(n)
                        if G.node[n]['order'] > max_order:
                            max_order = G.node[n]['order']
                    else:
                        if G.node[n]['label'] == "Wunderink" or G.node[n][
                                "label"] == "Weiss":
                            G.node[n]["status"] = "I"
                            list_I.append(G.node[n]['label'])

                        if G.node[n]['type'] == "A":
                            list_A.append(n)

                        if G.node[n]['type'] == "F":
                            list_F.append(n)

                list_single_t_evolution = []
                list_single_t_evolution.append(
                    2.0)  # I always start with TWO infected doctors!!

                for n in G.nodes(
                ):  # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                    if (G.node[n]['type'] == "A") or (G.node[n]['type']
                                                      == "F"):
                        if G.node[n]['label'] != "Wunderink" and G.node[n][
                                "label"] != "Weiss":
                            rand = random.random()
                            if rand < prob_Immune:
                                G.node[n]["status"] = "Immune"

            #   print max_order

            ################# the dynamics starts:

                t = 1
                while t < cutting_day:  # loop over shifts, in order   just until cutting day (training segment)
                    for n in G.nodes():
                        if G.node[n]['type'] == "shift" and G.node[n][
                                'order'] == t:

                            shift_lenght = int(G.node[n]['shift_lenght'])

                            if shift_lenght == 2 and n not in list_id_weekends_T3:
                                shift_lenght = 1  # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)
                            #  print "one-day weekend", G.node[n]['label'],G.node[n]['shift_lenght']

                            flag_possible_infection = 0
                            for doctor in G.neighbors(
                                    n
                            ):  #first i check if any doctor is infected in this shift
                                if G.node[doctor]["status"] == "I":
                                    flag_possible_infection = 1

                            if flag_possible_infection:
                                for doctor in G.neighbors(
                                        n
                                ):  # then the doctors in that shift, gets infected with prob_infection
                                    for i in range(shift_lenght):
                                        if G.node[doctor]["status"] == "S":
                                            rand = random.random()
                                            if rand < prob_infection:
                                                G.node[doctor]["status"] = "I"
                                                if G.node[doctor][
                                                        "type"] == "A":
                                                    list_I.append(
                                                        G.node[doctor]
                                                        ["label"])

                    list_single_t_evolution.append(float(
                        len(list_I)))  #/(len(list_A)+len(list_F)))

                    t += 1

                ######## end t loop

                list_lists_t_evolutions.append(list_single_t_evolution)

                list_dist_fixed_parameters.append(
                    compare_real_evol_vs_simus_to_be_called.compare_two_curves(
                        list_actual_evol_training, list_single_t_evolution))

                list_dist_abs_at_ending_point_fixed_parameters.append(
                    abs(list_single_t_evolution[-1] -
                        list_actual_evol_training[-1])
                )  # i save the distance at the ending point between the current simu and actual evol

                #  print "actual:",len(list_actual_evol_training),"  simu:",len(list_single_t_evolution)   # 125, 125

                list_final_num_infected.append(list_single_t_evolution[-1])

                list_dist_at_ending_point_fixed_parameters.append(
                    list_single_t_evolution[-1] - list_actual_evol_training[-1]
                )  # i save the distance at the ending point between the current simu and actual evol

            ######## end loop Niter for the training fase

            list_pair_dist_std_delta_end = []

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_dist_fixed_parameters)
            )  # average dist between the curves over Niter
            list_pair_dist_std_delta_end.append(
                numpy.std(list_dist_fixed_parameters))

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_dist_abs_at_ending_point_fixed_parameters))

            file3 = open(output_file3, 'at')  # i print out the landscape
            print >> file3, prob_infection, prob_Immune, numpy.mean(
                list_dist_abs_at_ending_point_fixed_parameters
            ), numpy.mean(list_dist_fixed_parameters), numpy.mean(
                list_final_num_infected
            ), numpy.std(list_final_num_infected), numpy.std(
                list_final_num_infected) / numpy.mean(list_final_num_infected)
            file3.close()

            histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_test_train_infection_p" + str(
                prob_infection) + "_Immune" + str(prob_Immune) + "_" + str(
                    Niter_training) + "iter_day" + str(cutting_day) + ".dat"
            histograma_gral_negv_posit.histograma(
                list_dist_at_ending_point_fixed_parameters, histogram_filename)

            histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_training_p" + str(
                prob_infection
            ) + "_" + "Immune" + str(prob_Immune) + "_" + str(
                Niter_training) + "iter_day" + str(cutting_day) + ".dat"

            histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,
                                                  Nbins, histogram_filename2)

            print "written histogram file: ", histogram_filename
            print "written histogram file: ", histogram_filename2

            value = numpy.mean(list_dist_fixed_parameters) * numpy.mean(
                list_dist_abs_at_ending_point_fixed_parameters
            )  # if SD=0, it is a problem, because then that is the minimun value, but not the optimum i am looking for!!

            dict_filenames_prod_distances[output_file2] = value

            if (
                    numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)
            ) <= delta_end:  # i only consider situations close enough at the ending point

                dict_filenames_tot_distance[
                    output_file2] = list_pair_dist_std_delta_end

                print numpy.mean(list_dist_abs_at_ending_point_fixed_parameters
                                 ), "added scenario:", output_file2

        # file2 = open(output_file2,'at')
        #for s in range(len(list_single_t_evolution)):
        #   list_fixed_t=[]
        #  for iter in range (Niter_training):
        #     list_fixed_t.append(list_lists_t_evolutions[iter][s])
        #print >> file2, s,numpy.mean(list_fixed_t)
        #file2.close()

            prob_infection += delta_prob
        prob_Immune += delta_prob_Immune

    list_order_dict = compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(
        dict_filenames_tot_distance, "Infection_training_weight", all_team,
        Niter_training, cutting_day)

    # it returns a list of tuples like this :  ('../Results/network_final_schedule_withTeam3_local/infection/Average_time_evolution_Infection_training_p0.7_Immune0.0_2iter_2012.dat', [2540.0, 208.0, 1.0])  the best set of parameters  being the fist one of the elements in the list.

    string_name = "infection_training_" + fixed_param + str(
        Niter_training) + "iter_day" + str(
            cutting_day
        ) + ".dat"  # for the "Results" file with the sorted list of files

    list_order_dict2 = compare_real_evol_vs_simus_to_be_called.pick_minimum_prod_distances(
        dict_filenames_prod_distances, string_name, all_team, Niter_training,
        cutting_day)

    optimum_filename = list_order_dict[0][0]
    prob_infection = float(list_order_dict[0][0].split("_p")[1].split("_")[0])
    prob_Immune = float(
        list_order_dict[0][0].split("_Immune")[1].split("_")[0])

    print "Optimum parameters (old method) at day", cutting_day, " are: p=", prob_infection, " and Pimmune=", prob_Immune

    #  i already know the optimum, now i run the dynamics with those values, starting from the average state on the cutting point, and test:

    optimum_filename = list_order_dict2[0][0]
    prob_infection = float(list_order_dict2[0][0].split("_p")[1].split("_")[0])
    prob_Immune = float(
        list_order_dict2[0][0].split("_Immune")[1].split("_")[0])

    print "Optimum parameters (product of distances along_traject and at the end) at day", cutting_day, " are: p=", prob_infection, " and Pimmune=", prob_Immune

    print "Run that simulation with the optimum parameter set:", optimum_filename

    print "printed out landscape file:", output_file3

    output_file10 = "../Results/weight_shifts/Summary_results_training_segment_infection_p" + str(
        prob_infection) + "_" + "Immune" + str(prob_Immune) + "_" + str(
            Niter_training) + "iter_avg_ic_day" + str(cutting_day) + ".dat"
    file10 = open(output_file10, 'wt')

    print >> file10, "Summary results from train-testing persuasion with", Niter_training, "iter , using all the individual cutting points as IC, and with values for the parameters:  prob_inf ", prob_infection, " prob immune: ", prob_Immune, "\n"

    print >> file10, "Look for the file (or run that simulation) with the optimum parameter set:", optimum_filename
    file10.close()
def main():

    pupulation_age = "All"  #"young"  # or "adult"   or "All"

    if pupulation_age == "young":
        min_age_threshold = 0
        max_age_threshold = 15
    elif pupulation_age == "adult":
        min_age_threshold = 16
        max_age_threshold = 100
    elif pupulation_age == "All":
        min_age_threshold = 0
        max_age_threshold = 100

    else:
        print "wrong age range"
        exit()

    R = 10
    P = 5

    #######  to select results only from given rounds  (both ends included)
    min_round = 1
    max_round = 18

    ######### input file
    filename = "../Data/userdata.pickle"
    master_list = pickle.load(open(
        filename, 'rb'))  # es una lista: un elemento por jugador (541)
    #########

    ######### output files
    Nbins_fraction_coop = 15
    name_h_fraction_coop = "../Results/histogram_fraction_coop_tot_users.dat"

    Nbins_tot_payoff = 20
    name_h_tot_payoff = "../Results/histogram_tot_payoff_users.dat"

    Nbins_avg_payoff = 20
    name_h_avg_payoff = "../Results/histogram_avg_payoff_users.dat"

    output_filename1 = "../Results/Scatter_plot_cooperation_tot_and_avg_payoff.dat"
    output1 = open(output_filename1, 'wt')

    #########

    ### master_list  tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...],      'nickname': u'Caesar', 'id': 2L}]

    #la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda)
    # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...]

    num_valid_actions = 0.
    num_lower_H_actions = 0
    num_higher_H_actions = 0
    coop_actions_higher = 0
    coop_actions_lower = 0

    dict_user_id_list_coop = {}
    dict_user_id_list_tot_payoff = {}

    dict_user_id_gender = {}

    dict_user_list_actions_in_lower_Harmony = {}
    dict_user_avg_coop_in_lower_Harmony = {}

    dict_user_id_tot_numelections = {}

    dict_user_id_list_strat = {}

    num_users = float(len(master_list))

    list_cooperators_in_lower_Harmony = []
    list_cooperators_in_higher_Harmony = []

    list_defectors_in_lower_Harmony = []

    for dictionary in master_list:  # cada elemento de la lista es a su vez un dict

        nickname = unidecode(dictionary['nickname']).replace(" ", "_")
        user_id = dictionary['id']

        payoff_total = float(dictionary['guany_total']
                             )  # this is calculated only up to round #13  !!
        partida = dictionary['partida']

        gender = dictionary['genere']
        if gender == "h":
            gender = 1
        elif gender == "d":
            gender = 0

        dict_user_id_gender[user_id] = gender

        num_elecciones = int(dictionary['num_eleccions'])
        age = int(dictionary['edat'])
        avg_racionalidad = dictionary['rationality']
        avg_ambicion = dictionary['ambition']
        num_rondas = len(dictionary['rondes'])

        dict_user_id_tot_numelections[user_id] = num_elecciones

        if user_id not in dict_user_id_list_coop:
            dict_user_id_list_coop[user_id] = []
            dict_user_id_list_tot_payoff[user_id] = []
            dict_user_id_list_strat[user_id] = []

        list_dict_rondas = dictionary['rondes']

        for dict_ronda in list_dict_rondas:
            ##  cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}

            T = int(dict_ronda['T'])
            S = int(dict_ronda['S'])

            list_four_possible_values = [P, R, T, S]

            punto_TS = (T, S)

            try:
                payoff = float(dict_ronda['guany'])
                payoff_norm = float(dict_ronda['guany']) / float(
                    max(list_four_possible_values))
            except TypeError:
                payoff = dict_ronda['guany']  # if payoff is None

            payoff_oponent = dict_ronda['guany_oponent']
            rationality = dict_ronda['rationality']
            ambition = dict_ronda['ambition']

            round_number = dict_ronda['numronda']

            action = dict_ronda['seleccio']
            if action == "C":
                action = 1.
            elif action == "D":
                action = 0.
            # si no ha elegido nada, es None

            if action != None:
                num_valid_actions += 1
                if user_id not in dict_user_list_actions_in_lower_Harmony:
                    dict_user_list_actions_in_lower_Harmony[user_id] = []

            if action != None:
                dict_user_id_list_coop[user_id].append(action)

            if payoff != None:
                dict_user_id_list_tot_payoff[user_id].append(payoff)

            num_ronda = dict_ronda['numronda']
            quadrant = dict_ronda['cuadrant'].replace(" ",
                                                      "_").replace("'", "")

            action_oponent = dict_ronda['seleccio_oponent']
            if action_oponent == "C":
                action_oponent = 1.
            elif action_oponent == "D":
                action_oponent = 0.
            # si no ha elegido nada, es None

            oponent_id = dict_ronda['oponent']

            random_action = random.choice([0, 1])

            #  print "\nR:",R, " S:",S, " T:", T,  " P:",P, " action:",action, " payoff:",payoff

            strat = None
            strat1 = None
            if action == 1:
                if R == max(R, S, T, P) or S == max(R, S, T, P):
                    strat1 = "max_payoff"

            elif action == 0:
                if T == max(R, S, T, P) or T == max(R, S, T, P):
                    strat1 = "max_payoff"

            max_diff = max(R - R, S - T, T - S, P - P)
            min_diff = min(R - R, S - T, T - S, P - P)

            strat2 = None
            if action == 1:
                if S - T == max_diff:
                    strat2 = "max_diff"

            elif action == 0:
                if T - S == max_diff:
                    strat2 = "max_diff"

            if strat1 != None and strat2 != None:
                if "payoff" in strat1:
                    strat = strat1 + " " + strat2
                else:
                    strat = strat2 + " " + strat1
            else:
                if strat1 == None:
                    strat = strat2
                else:
                    strat = strat1

            dict_user_id_list_strat[user_id].append(strat)

            if S >= 5 and S <= 10:
                if T >= 5 and T <= 10:
                    if S <= T:  # the lower triangle of the Harmony game:
                        if action == 1:
                            if user_id not in list_cooperators_in_lower_Harmony:
                                list_cooperators_in_lower_Harmony.append(
                                    user_id)
                            coop_actions_lower += 1
                        elif action == 0:
                            if user_id not in list_defectors_in_lower_Harmony:
                                list_defectors_in_lower_Harmony.append(user_id)

                        if action != None:
                            dict_user_list_actions_in_lower_Harmony[
                                user_id].append(action)
                            num_lower_H_actions += 1

                    else:  # the upper triangle of the Harmony game:
                        if action == 1:
                            if user_id not in list_cooperators_in_higher_Harmony:
                                list_cooperators_in_higher_Harmony.append(
                                    user_id)
                            coop_actions_higher += 1
                        num_higher_H_actions += 1

    ###### end loop over lines in the main dict
    list_frac_coop_users = []
    list_tot_payoff_users = []
    list_avg_payoff_users = []
    dict_common_strat_num_users = {}
    dict_strat_num_users = {}

    for user_id in dict_user_id_list_coop:
        fract_coop = numpy.mean(dict_user_id_list_coop[user_id])
        list_frac_coop_users.append(fract_coop)

        tot_payoff = sum(dict_user_id_list_tot_payoff[user_id])
        list_tot_payoff_users.append(tot_payoff)

        avg_payoff = numpy.mean(dict_user_id_list_tot_payoff[user_id])
        list_avg_payoff_users.append(avg_payoff)

        common_strat = max(set(dict_user_id_list_strat[user_id]),
                           key=dict_user_id_list_strat[user_id].count
                           )  # most common element in the list
        print >> output1, fract_coop, tot_payoff, avg_payoff, dict_user_id_gender[
            user_id], dict_user_id_tot_numelections[user_id]

        ##### counting most common strategy per user
        if common_strat not in dict_common_strat_num_users:
            dict_common_strat_num_users[common_strat] = 0
        dict_common_strat_num_users[common_strat] += 1

        ######## counting all strategies
        for strat in dict_user_id_list_strat[user_id]:
            if strat not in dict_strat_num_users:
                dict_strat_num_users[strat] = 0
            dict_strat_num_users[strat] += 1

    list_avg_cooperators_lower_H = []
    for user_id in dict_user_list_actions_in_lower_Harmony:
        dict_user_avg_coop_in_lower_Harmony[user_id] = numpy.mean(
            dict_user_list_actions_in_lower_Harmony[user_id])

        print user_id, dict_user_list_actions_in_lower_Harmony[
            user_id], dict_user_avg_coop_in_lower_Harmony[user_id]

        if dict_user_avg_coop_in_lower_Harmony[user_id] > 0.5:
            list_avg_cooperators_lower_H.append(user_id)

    print len(dict_user_list_actions_in_lower_Harmony), len(
        list_avg_cooperators_lower_H)
    raw_input()

    histograma_bines_gral.histograma_bins(
        list_frac_coop_users, Nbins_fraction_coop, name_h_fraction_coop
    )  #x_position , norm_count, count, norm_cumulat_count, cumulat_count ,  float(hist[b])/float(len(lista))
    histograma_bines_gral.histograma_bins(list_tot_payoff_users,
                                          Nbins_tot_payoff, name_h_tot_payoff)
    histograma_bines_gral.histograma_bins(list_avg_payoff_users,
                                          Nbins_avg_payoff, name_h_avg_payoff)

    output1.close()
    print "written output file:", output_filename1

    print "common strategies within user:"******"\nall strategies:"
    for key in dict_strat_num_users:
        print key, dict_strat_num_users[
            key], dict_strat_num_users[key] / num_valid_actions

    print "# items in the pickle:", len(master_list)

    print "\n# unique coop in lower Harmony:", len(
        list_cooperators_in_lower_Harmony
    ), "  # actions in lower H:", num_lower_H_actions, " fract_coop:", coop_actions_lower / float(
        num_lower_H_actions), " # avg cooperatos (>0.5) in lower H:", len(
            list_avg_cooperators_lower_H)

    print "\n# unique defectors in lower Harmony:", len(
        list_defectors_in_lower_Harmony)

    print "\nintersection unique users cooperators and defectors in lower Harmony", len(
        list(
            set(list_cooperators_in_lower_Harmony)
            & set(list_defectors_in_lower_Harmony)))

    print "\n# unique coop in higher Harmony:", len(
        list_cooperators_in_higher_Harmony
    ), "  # actions in higher H:", num_higher_H_actions, " fract_coop:", coop_actions_higher / float(
        num_higher_H_actions)

    print "  tot # valid actions:", num_valid_actions, "  tot # users:", num_users

    #output2= open("../Results/pickle_cooperators_lower_H.pickle",'wt')
    #pickle.dump(list_avg_cooperators_lower_H, output2)

    ####### i read the file again to compare levels of cooperations for some sets of users

    list_actions_all_users = []
    list_actions_coop_in_lower_H = []
    list_actions_NO_coop_in_lower_H = []

    for dictionary in master_list:  # cada elemento de la lista es a su vez un dict

        user_id = dictionary['id']

        list_dict_rondas = dictionary['rondes']

        for dict_ronda in list_dict_rondas:

            try:
                payoff = float(dict_ronda['guany'])
                payoff_norm = float(dict_ronda['guany']) / float(
                    max(list_four_possible_values))
            except TypeError:
                payoff = dict_ronda['guany']  # if payoff is None

            payoff_oponent = dict_ronda['guany_oponent']

            action = dict_ronda['seleccio']
            if action == "C":
                action = 1.
            elif action == "D":
                action = 0.
            # si no ha elegido nada, es None

            if action != None:

                list_actions_all_users.append(action)

                if user_id in list_cooperators_in_lower_Harmony:
                    list_actions_coop_in_lower_H.append(action)

                else:
                    list_actions_NO_coop_in_lower_H.append(action)
    print "avg coop all users:", numpy.mean(list_actions_all_users), len(
        list_actions_all_users), "    avg coop special set:", numpy.mean(
            list_actions_coop_in_lower_H), len(
                list_actions_coop_in_lower_H
            ), "    avg coop NO special set:", numpy.mean(
                list_actions_NO_coop_in_lower_H), len(
                    list_actions_NO_coop_in_lower_H)
Ejemplo n.º 3
0
def main(graph_name):

    cutting_day = 125  # to separate   training-testing

    G = nx.read_gml(graph_name)

    all_team = "NO"  # as adopters or not

    dir_real_data = '../Results/'

    delta_end = 3  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)

    Niter_training = 5
    Niter_testing = 5

    ######################################################################################
    #  I read the file of the actual evolution of the idea spreading in the hospital:   ##
    ######################################################################################

    if all_team == "YES":
        filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv"

    else:
        filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_SIMPLER.csv"
    #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs

    list_actual_evol = []
    result_actual_file = csv.reader(open(filename_actual_evol, 'rb'),
                                    delimiter=',')
    cont = 0
    for row in result_actual_file:
        if cont > 0:  # i ignore the first line with the headers

            num_adopters = row[3]

            list_actual_evol.append(float(num_adopters))

        cont += 1

    list_actual_evol_training = list_actual_evol[:cutting_day]
    list_actual_evol_testing = list_actual_evol[(cutting_day - 1):]

    ##################################################################

    #../Results/network_final_schedule_withTeam3/infection/Average_time_evolution_Infection_p0.9_Immune0.5_1000iter_2012.dat

    prob_min = 0.00
    prob_max = 1.01
    delta_prob = 0.1

    prob_Immune_min = 0.00
    prob_Immune_max = 1.01
    delta_prob_Immune = 0.1

    dir = "../Results/network_final_schedule_withTeam3_local/infection/"

    dict_filenames_tot_distance = {
    }  # i will save the filename as key and the tot distance from that curve to the original one

    dict_filenames_list_dict_network_states = {
    }  # i will save the filename as key and the list of networks at cutting day as value

    prob_Immune = prob_Immune_min
    while prob_Immune <= prob_Immune_max:

        print "prom Immune:", prob_Immune

        prob_infection = prob_min
        while prob_infection <= prob_max:

            print "  p:", prob_infection

            output_file2 = dir + "Average_time_evolution_Infection_training_p" + str(
                prob_infection) + "_" + "Immune" + str(
                    prob_Immune) + "_" + str(Niter_training) + "iter_2012.dat"
            file2 = open(output_file2, 'wt')
            file2.close()

            # i create the empty list of list for the Niter temporal evolutions
            num_shifts = 0
            for n in G.nodes():
                G.node[n]["status"] = "S"
                if G.node[n]['type'] == "shift":
                    num_shifts += 1

        #  list_final_I_values_fixed_p=[]  # i dont care about the final values right now, but about the whole time evol
            list_lists_t_evolutions = []

            list_dist_fixed_parameters = []
            list_dist_at_ending_point_fixed_parameters = []

            list_dict_network_states = []
            list_networks_at_cutting_day = []

            for iter in range(Niter_training):

                print "     iter:", iter

                dict_network_states = {}

                list_I = []  #list infected doctors
                list_ordering = []
                list_s = []
                list_A = []
                list_F = []

                ########### set I.C.

                max_order = 0
                for n in G.nodes():
                    G.node[n]["status"] = "S"  # all nodes are Susceptible
                    if G.node[n]['type'] == "shift":
                        list_s.append(n)
                        if G.node[n]['order'] > max_order:
                            max_order = G.node[n]['order']
                    else:
                        if G.node[n]['label'] == "Wunderink" or G.node[n][
                                "label"] == "Weiss":
                            G.node[n]["status"] = "I"
                            list_I.append(G.node[n]['label'])

                            ######################## WHAT ABOUT SMITH AND SPORN???

                        if G.node[n]['type'] == "A":
                            list_A.append(n)

                        if G.node[n]['type'] == "F":
                            list_F.append(n)

                list_single_t_evolution = []
                list_single_t_evolution.append(
                    2.0)  # I always start with TWO infected doctors!!

                for n in G.nodes(
                ):  # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                    if (G.node[n]['type'] == "A") or (G.node[n]['type']
                                                      == "F"):
                        if G.node[n]['label'] != "Wunderink" and G.node[n][
                                "label"] != "Weiss":
                            rand = random.random()
                            if rand < prob_Immune:
                                G.node[n]["status"] = "Immune"

            #   print max_order

            ################# the dynamics starts:

                t = 1
                while t < cutting_day:  # loop over shifts, in order   just until cutting day (training segment)
                    for n in G.nodes():
                        if G.node[n]['type'] == "shift" and G.node[n][
                                'order'] == t:
                            flag_possible_infection = 0
                            for doctor in G.neighbors(
                                    n
                            ):  #first i check if any doctor is infected in this shift
                                if G.node[doctor]["status"] == "I":
                                    flag_possible_infection = 1

                            if flag_possible_infection:
                                for doctor in G.neighbors(
                                        n
                                ):  # then the doctors in that shift, gets infected with prob_infection
                                    if G.node[doctor]["status"] == "S":
                                        rand = random.random()
                                        if rand < prob_infection:
                                            G.node[doctor]["status"] = "I"
                                            list_I.append(
                                                G.node[doctor]["label"])

                    list_single_t_evolution.append(float(
                        len(list_I)))  #/(len(list_A)+len(list_F)))

                    t += 1

                for n in G.nodes():
                    if G.node[n]['type'] != "shift":
                        dict_network_states[G.node[n]
                                            ["label"]] = G.node[n]["status"]

                list_dict_network_states.append(dict_network_states)

                ######## end t loop

                list_lists_t_evolutions.append(list_single_t_evolution)

                list_dist_fixed_parameters.append(
                    compare_real_evol_vs_simus_to_be_called.compare_two_curves(
                        list_actual_evol_training, list_single_t_evolution))

                list_dist_at_ending_point_fixed_parameters.append(
                    abs(list_single_t_evolution[-1] -
                        list_actual_evol_training[-1])
                )  # i save the distance at the ending point between the current simu and actual evol

            ######## end loop Niter for the training fase

            list_pair_dist_std_delta_end = []

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_dist_fixed_parameters)
            )  # average dist between the curves over Niter
            list_pair_dist_std_delta_end.append(
                numpy.std(list_dist_fixed_parameters))

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_dist_at_ending_point_fixed_parameters))

            if (
                    numpy.mean(list_dist_at_ending_point_fixed_parameters)
            ) <= delta_end:  # i only consider situations close enough at the ending point

                dict_filenames_tot_distance[
                    output_file2] = list_pair_dist_std_delta_end

                dict_filenames_list_dict_network_states[
                    output_file2] = list_dict_network_states

            file2 = open(output_file2, 'at')
            for s in range(len(list_single_t_evolution)):
                list_fixed_t = []
                for iter in range(Niter_training):
                    list_fixed_t.append(list_lists_t_evolutions[iter][s])
                print >> file2, s, numpy.mean(list_fixed_t)
            file2.close()

            prob_infection += delta_prob
        prob_Immune += delta_prob_Immune

    list_order_dict = compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(
        dict_filenames_tot_distance, "Infection_training", all_team,
        Niter_training)
    # it returns a list of tuples like this :  ('../Results/network_final_schedule_withTeam3_local/infection/Average_time_evolution_Infection_training_p0.7_Immune0.0_2iter_2012.dat', [2540.0, 208.0, 1.0])  the best set of parameters  being the fist one of the elements in the list.

    optimum_filename = list_order_dict[0][0]

    prob_infection = float(list_order_dict[0][0].split("_p")[1][0:3])
    prob_Immune = float(list_order_dict[0][0].split("_Immune")[1][0:3])

    #   raw_input()
    print "starting testing fase with:"
    print "p=", prob_infection, " and Pimmune=", prob_Immune

    #  i already know the optimum, now i run the dynamics with those values, starting from the average state on the cutting point, and test:

    list_dist_fixed_parameters = []
    list_dist_at_ending_point_fixed_parameters = []

    list_lists_t_evolutions = []

    lista_num_infect = []
    lista_I_drs = []
    dict_tot_I_doctors = {}

    lista_num_imm = []
    lista_Imm_drs = []
    dict_tot_Imm_doctors = {}
    for dictionary in dict_filenames_list_dict_network_states[
            optimum_filename]:

        # dictionary={Dr1:status, Dr2:status,}  # one dict per iteration
        num_I = 0.
        num_Imm = 0.

        #raw_input()
        for key in dictionary:
            if dictionary[key] == "I":
                num_I += 1.
                if key not in lista_I_drs:
                    lista_I_drs.append(key)
                    dict_tot_I_doctors[key] = 1.
                else:
                    dict_tot_I_doctors[key] += 1.

            elif dictionary[key] == "Immune":
                num_Imm += 1.
                if key not in lista_Imm_drs:
                    lista_Imm_drs.append(key)
                    dict_tot_Imm_doctors[key] = 1.
                else:
                    dict_tot_Imm_doctors[key] += 1.

        lista_num_infect.append(num_I)
        lista_num_imm.append(num_Imm)

    avg_inf_drs = int(
        numpy.mean(lista_num_infect))  # i find out the average num I
    print "avg of inf:", numpy.mean(lista_num_infect), avg_inf_drs, numpy.std(
        lista_num_infect)

    if numpy.mean(lista_num_infect) - avg_inf_drs >= 0.5:
        avg_inf_drs += 1.0
    # print avg_inf_drs

    avg_imm_drs = int(
        numpy.mean(lista_num_imm))  # i find out the average num Immune
    #print "avg of imm:", numpy.mean(lista_num_imm),avg_imm_drs,numpy.std(lista_num_imm)

    if numpy.mean(lista_num_imm) - avg_imm_drs >= 0.5:
        avg_imm_drs += 1.0
    #  print avg_imm_drs

# i sort the list from more frequently infected to less
    list_sorted_dict = sorted(dict_tot_I_doctors.iteritems(),
                              key=operator.itemgetter(1))

    new_list_sorted_dict = list_sorted_dict
    new_list_sorted_dict.reverse()

    #  print "I:",new_list_sorted_dict

    #list_sorted_dict=[(u'Weiss', 5.0), (u'Wunderink', 5.0), (u'Keller', 4.0), (u'Go', 3.0), (u'Cuttica', 3.0), (u'Rosario', 2.0), (u'Radigan', 2.0), (u'Smith', 2.0), (u'RosenbergN', 2.0), (u'Gillespie', 1.0), (u'Osher', 1.0), (u'Mutlu', 1.0), (u'Dematte', 1.0), (u'Hawkins', 1.0), (u'Gates', 1.0)]

    dict_infect_prob_being_so = {}
    for item in new_list_sorted_dict:
        dict_infect_prob_being_so[item[0]] = 0.

    tot_sum_inf_so_far = 0.
    for item in new_list_sorted_dict:
        dict_infect_prob_being_so[item[0]] = item[1] + tot_sum_inf_so_far
        tot_sum_inf_so_far += item[1]

    list_sorted_dict_infect_prob_being_so = sorted(
        dict_infect_prob_being_so.iteritems(), key=operator.itemgetter(1))

    #  new_list_sorted_dict_dict_infect_prob_being_so=list_sorted_dict_infect_prob_being_so
    # new_list_sorted_dict_dict_infect_prob_being_so.reverse()

    print list_sorted_dict_infect_prob_being_so

    #print tot_sum_inf_so_far

    # i sort the list from more frequently imm to less
    list_sorted_dict_imm = sorted(dict_tot_Imm_doctors.iteritems(),
                                  key=operator.itemgetter(1))

    new_list_sorted_dict_imm = list_sorted_dict_imm
    new_list_sorted_dict_imm.reverse()

    # print "Immunes:",new_list_sorted_dict_imm

    dict_imm_prob_being_so = {}
    for item in new_list_sorted_dict_imm:
        dict_imm_prob_being_so[item[0]] = 0.

    tot_sum_imm_so_far = 0.
    for item in new_list_sorted_dict_imm:
        dict_imm_prob_being_so[item[0]] = item[1] + tot_sum_imm_so_far
        tot_sum_imm_so_far += item[1]

    list_sorted_dict_imm_prob_being_so = sorted(
        dict_imm_prob_being_so.iteritems(), key=operator.itemgetter(1))

    # new_list_sorted_dict_dict_imm_prob_being_so=list_sorted_dict_imm_prob_being_so
    #new_list_sorted_dict_dict_imm_prob_being_so.reverse()

    print list_sorted_dict_imm_prob_being_so  #new_list_sorted_dict_dict_imm_prob_being_so

    for iter in range(Niter_testing):

        #   print "     iter:",iter, len(list_I)
        raw_input()

        # i establish the initial conditions (as probabilistically, according to the cutting point distribution of status)

        dict_label_node = {}
        list_I = []  #list infected doctors
        list_Immune = []  #list infected doctors
        for node in G.nodes():
            if G.node[node]['type'] != "shift":
                label = G.node[node]['label']
                G.node[node]["status"] = "S"  #by default, all are susceptible
                dict_label_node[label] = node

        ii = 0.
        while ii <= avg_inf_drs:

            rand = random.random() * tot_sum_inf_so_far
            for i in range(len(list_sorted_dict_infect_prob_being_so)):
                if rand <= list_sorted_dict_infect_prob_being_so[i][1]:
                    label = list_sorted_dict_infect_prob_being_so[i][0]

                    current_prob_value = list_sorted_dict_infect_prob_being_so[
                        i][1]
                    node = dict_label_node[label]

                    G.node[node]["status"] = "I"
                    print label, "got infected"
                    list_I.append(label)

            #update the prob of being infected:
            for jj in range(len(list_sorted_dict_infect_prob_being_so)):
                if list_sorted_dict_infect_prob_being_so[jj][
                        1] >= current_prob_value:
                    print list_sorted_dict_infect_prob_being_so[jj][1]
                    list_sorted_dict_infect_prob_being_so[jj][
                        1] -= current_prob_value

            list_sorted_dict_infect_prob_being_so[i][
                1] = 0.  #so i don't pick it again
            ii += 1.

        ii = 0.
        while ii <= avg_imm_drs:

            rand = random.random() * tot_sum_imm_so_far
            for i in range(len(list_sorted_dict_imm_prob_being_so)):
                if rand <= list_sorted_dict_imm_prob_being_so[i][1]:
                    label = list_sorted_dict_imm_prob_being_so[i][0]
                    node = dict_label_node[label]

                    G.node[node]["status"] = "Immune"
                    print label, "got immune"
                    list_Immune.append(label)

            #update the prob of being infected:
            for jj in range(len(list_sorted_dict_imm_prob_being_so)):
                if list_sorted_dict_imm_prob_being_so[jj][
                        1] >= current_prob_value:
                    list_sorted_dict_imm_prob_being_so[jj][
                        1] -= current_prob_value

            list_sorted_dict_imm_prob_being_so[i][
                1] = 0.  #so i don't pick it again
            ii += 1.

        list_single_t_evolution = []
        list_single_t_evolution.append(len(list_I))

        t = cutting_day
        while t <= max_order:  # loop over shifts, in order   just until cutting day (training segment)
            for n in G.nodes():
                if G.node[n]['type'] == "shift" and G.node[n]['order'] == t:
                    flag_possible_infection = 0
                    for doctor in G.neighbors(
                            n
                    ):  #first i check if any doctor is infected in this shift
                        if G.node[doctor]["status"] == "I":
                            flag_possible_infection = 1

                    if flag_possible_infection:
                        for doctor in G.neighbors(
                                n
                        ):  # then the doctors in that shift, gets infected with prob_infection
                            if G.node[doctor]["status"] == "S":
                                rand = random.random()
                                if rand < prob_infection:
                                    G.node[doctor]["status"] = "I"
                                    list_I.append(G.node[doctor]["label"])

            list_single_t_evolution.append(float(len(list_I)))
            print t, len(list_I)
            t += 1

        list_lists_t_evolutions.append(list_single_t_evolution)

        list_dist_fixed_parameters.append(
            compare_real_evol_vs_simus_to_be_called.compare_two_curves(
                list_actual_evol_testing, list_single_t_evolution))

        print " dist:", list_dist_fixed_parameters[-1]

        list_dist_at_ending_point_fixed_parameters.append(
            abs(list_single_t_evolution[-1] - list_actual_evol_testing[-1])
        )  # i save the distance at the ending point between the current simu and actual evol

    ############### end loop Niter  for the testing

    num_valid_endings = 0.
    for item in list_dist_at_ending_point_fixed_parameters:
        if item <= delta_end:  # i count how many realizations i get close enough at the ending point
            num_valid_endings += 1.

    print "average distance of the optimum in the testing segment:", numpy.mean(
        list_dist_fixed_parameters), numpy.std(
            list_dist_fixed_parameters), list_dist_fixed_parameters
    print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter_testing, list_dist_at_ending_point_fixed_parameters

    output_file5 = dir + "Average_time_evolution_Infection_testing_prob_ic_p" + str(
        prob_infection) + "_" + "Immune" + str(prob_Immune) + "_" + str(
            Niter_testing) + "iter_2012.dat"

    file5 = open(output_file5, 'wt')
    for s in range(len(list_single_t_evolution)):
        list_fixed_t = []
        for iter in range(Niter_testing):
            list_fixed_t.append(list_lists_t_evolutions[iter][s])
        print >> file5, s + cutting_day, numpy.mean(list_fixed_t)
    #  print  s+cutting_day,numpy.mean(list_fixed_t)
    file5.close()

    print "written:", output_file5

    histograma_bines_gral.histograma_bins(
        list_dist_fixed_parameters, 150,
        "../Results/histogr_tot_distances_testing_avg_ic_segment")
    histograma_gral.histograma(
        list_dist_at_ending_point_fixed_parameters,
        "../Results/histogr_distances_ending_testing_avg_ic_segment")
Ejemplo n.º 4
0
def main():

    #full_network_filename="./network_all_users/full_network_all_users.gml"  # i CANT use this network, because the labels dont match the users id from the dB
    # G_full = nx.read_gml(full_network_filename)

    #  list_A=[]   #Testign out how KS works on a random sample
    # list_B=[]
    #for i in range (10000):
    #   list_A.append(random.random())
    #  list_B.append(random.random())

    #print "KS test listA against normal distrib:", stats.kstest(list_A, "norm" )
    # print "KS test listB against normal distrib:", stats.kstest(list_B, "norm" )
    #print "two-sided KS test listA vs listB:", stats.ks_2samp(list_A, list_B)

    unrealistic_weight_change = 70.

    database = "calorie_king_social_networking_2010"
    server = "tarraco.chem-eng.northwestern.edu"
    user = "******"
    passwd = "n1ckuDB!"
    db = Connection(server, database, user, passwd)

    GC_network_filename = "./network_all_users/GC_full_network_all_users_merged_small_comm_roles_diff_layers1_roles_diff_layers1.5.gml"
    G = nx.read_gml(GC_network_filename)

    output_filename = "./network_all_users/Results_comparison_histograms_percent_weight_change.txt"
    file_output = open(output_filename, 'wt')

    #  print "num. nodes:",len(G.nodes())

    list_of_lists = nx.connected_components(G)

    print "num. of components:", len(list_of_lists), "size GC:", len(
        list_of_lists[0])

    list_weight_changes_GC = []
    list_weight_changes_R6friends = []
    for node in G.nodes():
        label = G.node[node]["label"]
        percent_weight_change = G.node[node]["percentage_weight_change"]
        R6_overlap = G.node[node]["R6_overlap"]
        #print node, label, weight_change, R6_overlap
        if percent_weight_change > -unrealistic_weight_change and percent_weight_change < unrealistic_weight_change:  # filter out unrealistic values

            list_weight_changes_GC.append(percent_weight_change)

            if R6_overlap > 0:
                list_weight_changes_R6friends.append(percent_weight_change)

    print >> file_output, "num GC users:", len(
        list_weight_changes_GC), "num users with R6 friends:", len(
            list_weight_changes_R6friends)

    histograma_bines_gral.histograma_bins(
        list_weight_changes_GC, 20,
        "./network_all_users/histogram_weight_change_GC_users.dat")
    histograma_bines_gral.histograma_bins(
        list_weight_changes_R6friends, 20,
        "./network_all_users/histogram_weight_change_users_with_R6friends.dat")

    print >> file_output, "KS test GC against normal distrib:", stats.kstest(
        list_weight_changes_GC, "norm")
    print >> file_output, "KS test users with R6 friends against normal distrib:", stats.kstest(
        list_weight_changes_R6friends, "norm")

    print >> file_output, "two-sided KS test GC vs users with R6 friends:", stats.ks_2samp(
        list_weight_changes_GC, list_weight_changes_R6friends)

    list_weight_changes_all = []
    query1 = """SELECT * FROM users"""
    result1 = db.query(query1)  # is a list of dicts.
    for r1 in result1:
        percent_weight_change = (float(r1['most_recent_weight']) - float(
            r1['initial_weight'])) / float(r1['initial_weight'])

        #    if percent_weight_change > -unrealistic_weight_change and  percent_weight_change < unrealistic_weight_change :   # filter out unrealistic values
        list_weight_changes_all.append(percent_weight_change)

    histograma_bines_gral.histograma_bins(
        list_weight_changes_all, 200,
        "./network_all_users/histogram_weight_change_users_all_200bins.dat")

    print >> file_output, "tot. number users", len(list_weight_changes_all)
    print >> file_output, "KS test all against normal distrib:", stats.kstest(
        list_weight_changes_all, "norm")
    print >> file_output, "two-sided KS test all vs GC:", stats.ks_2samp(
        list_weight_changes_all, list_weight_changes_GC)
    print >> file_output, "two-sided KS test all vs users with R6 friends:", stats.ks_2samp(
        list_weight_changes_GC, list_weight_changes_R6friends)

    file_output.close()
    print "written file:", output_filename

    exit()

    query1 = """SELECT * FROM friends order by src asc"""
    result1 = db.query(query1)  # is a list of dict.

    print "number links:", len(result1)
    list_friends = []

    for r1 in result1:

        label_src = r1['src']

        label_dest = r1['dest']

        if label_src not in list_friends:
            list_friends.append(label_src)
        if label_dest not in list_friends:
            list_friends.append(label_dest)

    print "num networked users:", len(list_friends)
Ejemplo n.º 5
0
def main():


    pupulation_age="All"   #"young"  # or "adult"   or All

    if pupulation_age== "young":
        min_age_threshold=0
        max_age_threshold=15
    elif pupulation_age== "adult":
        min_age_threshold=16
        max_age_threshold=100
    elif pupulation_age== "All":
        min_age_threshold=0
        max_age_threshold=100

    else:
        print "wrong age range"
        exit()





    filename="../Data/userdata.pickle"
    master_list=pickle.load(open(filename, 'rb'))   # es una lista: un elemento por jugador (541)




    ######### output files
    Nbins=15
    name_h="../Results/histogram_ages.dat"

    output_filename1="../Results/Cooperation_TSplane_"+str(pupulation_age)+"_ages.dat"
    output1= open(output_filename1,'wt')

    ######### 




 ### master_list  tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...],      'nickname': u'Caesar', 'id': 2L}]


#la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda)
   # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...]


    dict_TSplane_list_actions={}
    dict_TSplane_avg_coop={}
    dict_TSplane_std_coop={}






    list_ages=[]

    for dictionary in master_list:   # cada elemento de la lista es a su vez un dict
       
        payoff_total=dictionary['guany_total']
        partida=dictionary['partida']

        genero=dictionary['genere']
        if genero =="h":
            genero=1            
        elif genero == "d":
            genero=0

        num_elecciones=dictionary['num_eleccions']
        age=int(dictionary['edat'])
        avg_racionalidad=dictionary['rationality']
        avg_ambicion=dictionary['ambition']
        num_rondas=len(dictionary['rondes'])
        nickname=unidecode(dictionary['nickname']).replace(" ", "_")
        user_id=dictionary['id']

       
      
        list_dict_rondas=dictionary['rondes']

      


        list_ages.append(age)


        for dict_ronda in list_dict_rondas:
          ##  cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}

            payoff=dict_ronda['guany']
            payoff_oponent=dict_ronda['guany_oponent']
            rationality=dict_ronda['rationality']
            ambition=dict_ronda['ambition']

            action=dict_ronda['seleccio']
            if action =="C":
                action=1.
            elif action=="D":
                action=0.
            # si no ha elegido nada, es None
              

            num_ronda=dict_ronda['numronda']
            quadrant=dict_ronda['cuadrant'].replace(" ", "_").replace("'", "")

                
            action_oponent=dict_ronda['seleccio_oponent']
            if action_oponent =="C":
                action_oponent=1.
            elif action_oponent=="D":
                action_oponent=0.           
             # si no ha elegido nada, es None
           

            oponent_id=dict_ronda['oponent']
         
            T=int(dict_ronda['T'])
            S=int(dict_ronda['S'])

            punto_TS=(T,S)

          
            if punto_TS in  dict_TSplane_list_actions:
                if action !=None:
                      if age >= min_age_threshold and age <=  max_age_threshold:
                          dict_TSplane_list_actions[punto_TS].append(action)  # 1:C,  0:D

            else:
                if action !=None:

                    if age >= min_age_threshold and age <=  max_age_threshold:
                        dict_TSplane_list_actions[punto_TS]=[]          
                        dict_TSplane_list_actions[punto_TS].append(action)

                   



          
            if rationality !=None:
                rationality=rationality*100              
            if ambition !=None:
                ambition=ambition*100
         

    old_T=None
    ####### the the avg cooperation per TS point
    for punto_TS in sorted(dict_TSplane_list_actions):
        
        dict_TSplane_avg_coop[punto_TS]=numpy.mean(dict_TSplane_list_actions[punto_TS])
        dict_TSplane_std_coop[punto_TS]=numpy.std(dict_TSplane_list_actions[punto_TS])



        if old_T != punto_TS[0]:
            print >> output1
            print   
        print punto_TS[0],punto_TS[1], dict_TSplane_avg_coop[punto_TS], dict_TSplane_std_coop[punto_TS]
        print >> output1,punto_TS[0],punto_TS[1], dict_TSplane_avg_coop[punto_TS], dict_TSplane_std_coop[punto_TS]
        
           
        old_T=punto_TS[0]




    histograma_bines_gral.histograma_bins(list_ages,Nbins, name_h)   #x_position , norm_count, count, norm_cumulat_count, cumulat_count ,  float(hist[b])/float(len(lista))   





    output1.close()
    print "written output datafile:", output_filename1
def main(graph_name):

    G = nx.read_gml(graph_name)

    list_id_weekends_T3 = look_for_T3_weekends(
        G
    )  # T3 doesnt share fellows in the weekend  (but they are the exception)

    percent_envelope = 95.
    Niter = 1000

    cutting_day = 125

    Nbins = 200  # for the histogram of sum of distances

    for_testing_fixed_set = "YES"  # when YES, fixed values param and get all statistics on final distances etc

    envelopes = "NO"

    delta_end = 3.  # >= than + or -  dr difference at the end of the evolution

    dir_real_data = '../Results/'

    all_team = "NO"  # as adopters or not  NO now means i use the file without fellows, only attendings

    if for_testing_fixed_set == "NO":
        output_file3 = "../Results/weight_shifts/Landscape_parameters_persuasion_" + str(
            Niter) + "iter_A_F_inferred.dat"
        file3 = open(output_file3, 'wt')
        file3.close()

######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################

    if all_team == "YES":
        print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
        exit()

    else:
        filename_actual_evol = "../Results/Actual_evolution_adopters_from_inference.dat"

    file1 = open(
        filename_actual_evol, 'r'
    )  ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
    list_lines_file = file1.readlines()

    list_actual_evol = []
    for line in list_lines_file:  # [1:]:   # i exclude the first row

        num_adopters = float(line.split("\t")[1])
        list_actual_evol.append(num_adopters)

##################################################################

#../Results/weight_shifts/persuasion/Time_evolutions_Persuasion_training_alpha0.2_damping0.0_mutual_encourg0.5_threshold0.7_unif_distr_1000iter_2012_seed31Oct_finalnetwork_day125.dat

#../Results/weight_shifts/persuasion/Time_evolutions_Persuasion_training_alpha0.5_damping0.4_mutual_encourg0.5_threshold0.5_unif_distr_1000iter_2012_seed31Oct_finalnetwork_day125.dat
#OJO!!! NECESITO DOS DECIMALES SIEMPRE, PARA QUE CUADRE CON EL NOMBRE DE LOS SUB-DIRECTORIOS DONDE LO GUARDO

    alpha_F_min = 0.10  #0.15   # alpha=0: nobody changes their mind
    alpha_F_max = 0.101  #0.351
    delta_alpha_F = 0.10  #AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT!

    min_damping = 0.00  #0.0     #its harder to go back from YES to NO again. =1 means no effect, =0.5 half the movement from Y->N than the other way around, =0 never go back from Y to N
    max_damping = 0.001  #0.451
    delta_damping = 0.10

    min_mutual_encouragement = 0.000  #0.50  # when two Adopters meet, they convince each other even more
    max_mutual_encouragement = 0.001  # 0.51   # KEEP THIS FIXED VALUES FOR NOW
    delta_mutual_encouragement = 0.10

    threshold_min = 0.50  #0.50  # larger than, to be an Adopte
    threshold_max = 0.501  # 0.51    # KEEP THIS FIXED VALUES FOR NOW
    delta_threshold = 0.10  # AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT

    print "\n\nPersuasion process on network, with Niter:", Niter

    dict_filenames_tot_distance = {
    }  # i will save the filename as key and the tot distance from that curve to the original one

    threshold = threshold_min
    while threshold <= threshold_max:
        print "thershold:", threshold

        alpha_F = alpha_F_min
        while alpha_F <= alpha_F_max:  # i explore all the parameter space, and create a file per each set of valuesllkl
            alpha_A = 1.0 * alpha_F
            print "  alpha_F:", alpha_F

            mutual_encouragement = min_mutual_encouragement
            while mutual_encouragement <= max_mutual_encouragement:
                print "    mutual_encouragement:", mutual_encouragement

                damping = min_damping
                while damping <= max_damping:
                    print "      damping:", damping

                    dir = "../Results/weight_shifts/persuasion/alpha%.2f_damping%.2f/" % (
                        alpha_F, damping)

                    if for_testing_fixed_set == "YES":
                        output_file = dir + "Time_evol_Persuasion_alpha" + str(
                            alpha_F
                        ) + "_damping" + str(damping) + "_mutual" + str(
                            mutual_encouragement
                        ) + "_threshold" + str(threshold) + "_" + str(
                            Niter) + "iter_alphaA_eq_alphaF_A_F_inferred.dat"

                    else:
                        output_file = dir + "Time_evol_Persuasion_alpha" + str(
                            alpha_F
                        ) + "_damping" + str(damping) + "_mutual" + str(
                            mutual_encouragement
                        ) + "_threshold" + str(threshold) + "_" + str(
                            Niter) + "iter_alphaA_eq_alphaF_A_F_inferred.dat"

                    file = open(output_file, 'wt')
                    file.close()

                    time_evol_number_adopters_ITER = [
                    ]  # list of complete single realizations of the dynamics
                    list_dist_fixed_parameters = []
                    list_dist_fixed_parameters_testing_segment = []
                    list_dist_abs_at_ending_point_fixed_parameters = []
                    list_dist_at_ending_point_fixed_parameters = []
                    list_final_num_adopt = []
                    list_abs_dist_point_by_point_indiv_simus_to_actual = []
                    list_dist_point_by_point_indiv_simus_to_actual = []

                    #list_abs_dist_at_cutting_day=[]
                    for iter in range(Niter):

                        print "         ", iter
                        list_t = []

                        time_evol_number_adopters = [
                        ]  # for a single realization of the dynamics

                        num_adopters, seed_shift, max_shift = set_ic(
                            G, threshold
                        )  # i establish who is Adopter and NonAdopter initially, and count how many shifts i have total

                        time_evol_number_adopters.append(float(num_adopters))
                        # print "initial number of adopters:", num_adopters
                        list_t.append(0)

                        ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
                        #                file4 = open(output_file.split('.dat')[0]+"_indiv_iter"+str(iter)+".dat",'wt')
                        #               file4.close()
                        ##########################################

                        # the dynamics starts:
                        t = int(seed_shift
                                ) + 1  # the first time step is just IC.???

                        while t <= max_shift:  # loop over shifts, in chronological order  (the order is the day index since seeding_day)
                            # print 't:',t
                            list_t.append(t)
                            for n in G.nodes():
                                if G.node[n]['type'] == "shift" and G.node[n][
                                        'order'] == t:  # i look for the shift corresponding to that time step

                                    shift_length = int(
                                        G.node[n]['shift_length'])

                                    if shift_length == 2 and n not in list_id_weekends_T3:
                                        shift_length = 1  # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)

#    print "one-day weekend", G.node[n]['label'],G.node[n]['shift_length']

                                    flag_possible_persuasion = 0
                                    for doctor in G.neighbors(n):
                                        if G.node[doctor][
                                                "status"] == "Adopter":  #first i check if any doctor is an adopter in this shift
                                            flag_possible_persuasion = 1
                                            break

                                    if flag_possible_persuasion == 1:
                                        list_doctors = []
                                        for doctor in G.neighbors(
                                                n
                                        ):  # for all drs in that shift
                                            list_doctors.append(doctor)

                                        pairs = itertools.combinations(
                                            list_doctors, 2
                                        )  # cos the shift can be 2 but also 3 doctors
                                        for pair in pairs:
                                            doctor1 = pair[0]
                                            doctor2 = pair[1]

                                            if G.node[doctor1][
                                                    'status'] != G.node[doctor2][
                                                        'status']:  # if they think differently,
                                                # there will be persuasion
                                                persuasion(
                                                    G, damping, doctor1,
                                                    doctor2, alpha_A, alpha_F,
                                                    threshold, shift_length
                                                )  # i move their values of opinion
                                                update_opinions(
                                                    G, threshold, doctor1,
                                                    doctor2
                                                )  #  i update status and make sure the values of the vectors stay between [0,1]

                                            else:  # if two Adopters meet, they encourage each other (if two NonAdopters, nothing happens)

                                                mutual_reinforcement(
                                                    G, mutual_encouragement,
                                                    doctor1, doctor2,
                                                    shift_length)
                                # else:
                                #   print "  no persuasion possible during shift (no adopters present)!"

                            list_Adopters = [
                            ]  #count how many i have at this time
                            for n in G.nodes():
                                try:
                                    if G.node[n]["status"] == "Adopter":
                                        if G.node[n][
                                                "label"] not in list_Adopters:  # and G.node[n]["type"]=="A":
                                            list_Adopters.append(
                                                G.node[n]["label"])
                                except:
                                    pass  # if the node is a shift, it doesnt have a 'status' attribute

                        #  if for_testing_fixed_set=="YES":
                        #    if t==cutting_day:
                        #      list_abs_dist_at_cutting_day.append(abs(float(list_actual_evol[-1])-float(len(list_Adopters))))
                        #     print abs(float(list_actual_evol[-1])-float(len(list_Adopters))), float(list_actual_evol[t]),float(len(list_Adopters))

                            time_evol_number_adopters.append(
                                float(len(list_Adopters)))

                            t += 1

                        ############## end while loop over t

                        time_evol_number_adopters_ITER.append(
                            time_evol_number_adopters)

                        list_dist_fixed_parameters.append(
                            compare_real_evol_vs_simus_to_be_called.
                            compare_two_curves(list_actual_evol,
                                               time_evol_number_adopters))
                        list_dist_fixed_parameters_testing_segment.append(
                            compare_real_evol_vs_simus_to_be_called.
                            compare_two_curves_testing_segment(
                                list_actual_evol, time_evol_number_adopters,
                                cutting_day))

                        list_dist_abs_at_ending_point_fixed_parameters.append(
                            abs(time_evol_number_adopters[-1] -
                                list_actual_evol[-1]))
                        list_dist_at_ending_point_fixed_parameters.append(
                            time_evol_number_adopters[-1] -
                            list_actual_evol[-1])

                        list_final_num_adopt.append(
                            time_evol_number_adopters[-1])

                        ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
                        #  file4 = open(output_file.split('.dat')[0]+"_indiv_iter"+str(iter)+".dat",'at')
                        # for i in range(len(time_evol_number_adopters)):  #ime step by time step
                        #   print >> file4, i,time_evol_number_adopters[i], alpha_F,damping,mutual_encouragement
                        #file4.close()
                        ########################################################

                        for index in range(len(time_evol_number_adopters)):

                            list_abs_dist_point_by_point_indiv_simus_to_actual.append(
                                abs(time_evol_number_adopters[index] -
                                    list_actual_evol[index]))
                            list_dist_point_by_point_indiv_simus_to_actual.append(
                                time_evol_number_adopters[index] -
                                list_actual_evol[index])

                    #######################end loop over Niter

                    list_pair_dist_std_delta_end = []

                    list_pair_dist_std_delta_end.append(
                        numpy.mean(list_dist_fixed_parameters)
                    )  # average dist between the curves over Niter
                    list_pair_dist_std_delta_end.append(
                        numpy.std(list_dist_fixed_parameters))

                    list_pair_dist_std_delta_end.append(
                        numpy.mean(
                            list_dist_abs_at_ending_point_fixed_parameters))

                    if for_testing_fixed_set == "NO":
                        file3 = open(output_file3,
                                     'at')  # i print out the landscape
                        print >> file3, alpha_F, damping, mutual_encouragement, threshold, numpy.mean(
                            list_dist_abs_at_ending_point_fixed_parameters
                        ), numpy.mean(list_dist_fixed_parameters), numpy.mean(
                            list_final_num_adopt), numpy.std(
                                list_final_num_adopt
                            ), numpy.std(list_final_num_adopt) / numpy.mean(
                                list_final_num_adopt)
                        file3.close()

                    if (
                            numpy.mean(
                                list_dist_abs_at_ending_point_fixed_parameters)
                    ) <= delta_end:  # i only consider situations close enough at the ending point

                        dict_filenames_tot_distance[
                            output_file] = list_pair_dist_std_delta_end

                    file = open(output_file, 'wt')
                    for i in range(len(time_evol_number_adopters)
                                   ):  #time step by time step
                        list_fixed_t = []
                        for iteracion in range(
                                Niter
                        ):  #loop over all independent iter of the process
                            list_fixed_t.append(
                                time_evol_number_adopters_ITER[iteracion][i]
                            )  # i collect all values for the same t, different iter

                        print >> file, list_t[i], numpy.mean(
                            list_fixed_t), numpy.std(
                                list_fixed_t
                            ), alpha_F, damping, mutual_encouragement
                    file.close()

                    print "printed out:  ", output_file

                    if envelopes == "YES":
                        calculate_envelope_set_curves.calculate_envelope(
                            time_evol_number_adopters_ITER, percent_envelope,
                            "Persuasion", [
                                alpha_F, damping, mutual_encouragement,
                                threshold
                            ])

                    if for_testing_fixed_set == "YES":

                        num_valid_endings = 0.
                        for item in list_dist_abs_at_ending_point_fixed_parameters:
                            if item <= delta_end:  # i count how many realizations i get close enough at the ending point
                                num_valid_endings += 1.

                        print "average distance of the optimum in the testing segment:", numpy.mean(
                            list_dist_fixed_parameters), numpy.std(
                                list_dist_fixed_parameters
                            ), list_dist_fixed_parameters, "\n"
                        print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                            list_dist_at_ending_point_fixed_parameters
                        ), "SD final dist", numpy.std(
                            list_dist_at_ending_point_fixed_parameters
                        ), list_dist_at_ending_point_fixed_parameters

                        histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_persuasion_alpha" + str(
                            alpha_F) + "_damping" + str(
                                damping) + "_mutual_encourg" + str(
                                    mutual_encouragement
                                ) + "_threshold" + str(threshold) + "_" + str(
                                    Niter) + "iter_alphaA_eq_alphaF_day" + str(
                                        cutting_day) + "_A_F_inferred.dat"
                        histograma_gral_negv_posit.histograma(
                            list_dist_at_ending_point_fixed_parameters,
                            histogram_filename)

                        histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_persuasion_alpha" + str(
                            alpha_F) + "_damping" + str(
                                damping) + "_mutual_encourg" + str(
                                    mutual_encouragement
                                ) + "_threshold" + str(threshold) + "_" + str(
                                    Niter) + "iter_alphaA_eq_alphaF_day" + str(
                                        cutting_day) + "_A_F_inferred.dat"

                        histograma_bines_gral.histograma_bins(
                            list_dist_fixed_parameters, Nbins,
                            histogram_filename2)

                        histogram_filename3 = "../Results/weight_shifts/histogr_sum_dist_testing_segment_persuasion_alpha" + str(
                            alpha_F) + "_damping" + str(
                                damping) + "_mutual_encourg" + str(
                                    mutual_encouragement
                                ) + "_threshold" + str(threshold) + "_" + str(
                                    Niter) + "iter_alphaA_eq_alphaF_day" + str(
                                        cutting_day) + "_A_F_inferred.dat"

                        histograma_bines_gral.histograma_bins_zero(
                            list_dist_fixed_parameters_testing_segment, Nbins,
                            histogram_filename3)

                        histogram_filename4 = "../Results/weight_shifts/histogr_abs_dist_point_by_point_persuasion_alpha" + str(
                            alpha_F) + "_damping" + str(
                                damping) + "_mutual_encourg" + str(
                                    mutual_encouragement
                                ) + "_threshold" + str(threshold) + "_" + str(
                                    Niter) + "iter_alphaA_eq_alphaF_day" + str(
                                        cutting_day) + "_A_F_inferred.dat"

                        histograma_gral_negv_posit.histograma(
                            list_abs_dist_point_by_point_indiv_simus_to_actual,
                            histogram_filename4)

                        histogram_filename5 = "../Results/weight_shifts/histogr_dist_point_by_point_persuasion_alpha" + str(
                            alpha_F) + "_damping" + str(
                                damping) + "_mutual_encourg" + str(
                                    mutual_encouragement
                                ) + "_threshold" + str(threshold) + "_" + str(
                                    Niter) + "iter_alphaA_eq_alphaF_day" + str(
                                        cutting_day) + "_A_F_inferred.dat"

                        histograma_gral_negv_posit.histograma(
                            list_dist_point_by_point_indiv_simus_to_actual,
                            histogram_filename5)

                        output_file10 = "../Results/weight_shifts/Summary_results_persuasion_alpha" + str(
                            alpha_F) + "_damping" + str(
                                damping) + "_mutual_encourg" + str(
                                    mutual_encouragement
                                ) + "_threshold" + str(threshold) + "_" + str(
                                    Niter) + "iter_alphaA_eq_alphaF_day" + str(
                                        cutting_day) + "_A_F_inferred.dat"
                        file10 = open(output_file10, 'wt')

                        print >> file10, "Summary results from best fit persuasion with", Niter, "iter, and with values for the parameters:  alpha ", alpha_F, " damping: ", damping, " mutual_encourg: ", mutual_encouragement, " threshold:", threshold

                        print >> file10, "average distance of the optimum in the testing segment:", numpy.mean(
                            list_dist_fixed_parameters), numpy.std(
                                list_dist_fixed_parameters
                            ), list_dist_fixed_parameters
                        print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                            list_dist_at_ending_point_fixed_parameters
                        ), "SD final dist", numpy.std(
                            list_dist_at_ending_point_fixed_parameters
                        ), list_dist_at_ending_point_fixed_parameters

                        print >> file10, "written optimum train_test evolution file:", output_file
                        print >> file10, "written histogram file: ", histogram_filename
                        print >> file10, "written histogram file: ", histogram_filename2

                        file10.close()

                        print "written optimum train_test evolution file:", output_file

                        print "written summary file: ", output_file10

                    damping += delta_damping
                mutual_encouragement += delta_mutual_encouragement
            alpha_F += delta_alpha_F
        threshold += delta_threshold

    if for_testing_fixed_set == "NO":  # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum
        compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(
            dict_filenames_tot_distance, "Persuasion_weight", all_team, Niter,
            None)  #last argument, cutting day (it doesnt apply)

    if for_testing_fixed_set == "NO":
        print "written landscape file:", output_file3
Ejemplo n.º 7
0
def main(graph_name):
 


   cutting_day=175  # to separate   training-testing





   G = nx.read_gml(graph_name)


   all_team="NO"   # as adopters or not

   list_id_weekends_T3=look_for_T3_weekends(G)  # T3 doesnt share fellows in the weekend  (but they are the exception)


   dir_real_data='../Results/'
   Nbins=20   # for the histogram of sum of distances


   delta_end=3.  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)

   Niter=1000
  
   fixed_param=""#FIXED_Pimm0_"    # or ""  # for the Results file that contains the sorted list of best parameters


######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################



  
   filename_actual_evol="../Results/Actual_evolution_adopters_from_inference.dat"
  


   file1=open(filename_actual_evol,'r')         ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
   list_lines_file=file1.readlines()
            

   list_actual_evol=[]  
   for line in list_lines_file:      # [1:]:   # i exclude the first row   
     
      num_adopters= float(line.split("\t")[1])          
      list_actual_evol.append(num_adopters)



   list_actual_evol_training=list_actual_evol[:cutting_day]
  # list_actual_evol_testing=list_actual_evol[(cutting_day-1):]  # i dont need this one
   
  
##################################################################


   prob_min=0.10
   prob_max=1.01
   delta_prob=0.1
   
   

   prob_Immune_min=0.0    
   prob_Immune_max=1.001
   delta_prob_Immune=0.1

# threshold is not personal, and set randomly to a value (0,1)
 
                   # of a single encounter with an infected  (it cant be zero or it doesnt make sense!)
   dose_min=0.05              #infect_threshold_min
   dose_max=1.001         #######infect_threshold_min/10.
   delta_dose=0.05           ##infect_threshold_min/10.


   dir="../Results/weight_shifts/infection/"       

   dict_filenames_tot_distance={}   # i will save the filename as key and the tot distance from that curve to the original one
   dict_filenames_prod_distances={}   

  

   prob_Immune=prob_Immune_min
   while prob_Immune<= prob_Immune_max:
        
      print "prom Immune:",prob_Immune        

      prob_infection=prob_min
      while prob_infection<= prob_max:
                 
            print "  p:",prob_infection                              
            
            dose=dose_min
            while dose <= dose_max:
               
               print "  dose:",dose





               output_file2=dir+"Average_time_evolution_Infection_memory_training_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_threshold_from_distrib_dose"+str(dose)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
             #  file2 = open(output_file2,'wt')                                       
              # file2.close()
               



      
               list_lists_t_evolutions=[]     # i create the empty list of list for the Niter temporal evolutions
               
               list_dist_fixed_parameters=[]
               list_dist_at_ending_point_fixed_parameters=[]
               list_dist_abs_at_ending_point_fixed_parameters=[]
              
               
               for iter in range(Niter):
            
              #    print "     iter:",iter


               
                  

            ########### set I.C.


                  list_I=[]  #list infected doctors
                  max_order=0
                  for n in G.nodes():
                     G.node[n]["status"]="S"  # all nodes are Susceptible
                     G.node[n]["infec_value"]=0. 
                     G.node[n]["personal_threshold"]=random.random()  # for a dr to become infected

                     if G.node[n]['type']=="shift":                        
                        if  G.node[n]['order']>max_order:
                           max_order=G.node[n]['order'] # to get the last shift-order for the time loop
                     else:
                        if G.node[n]['label']=="Wunderink"  or G.node[n]["label"]=="Weiss":           
                           G.node[n]["status"]="I"                       
                           G.node[n]["infec_value"]=G.node[n]["personal_threshold"]+ 1.
                           list_I.append(G.node[n]['label'])
          

            
           
                  list_single_t_evolution=[]
                  list_single_t_evolution.append(2.0)  # I always start with TWO infected doctors!!


                  for n in G.nodes():   # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                     if (G.node[n]['type']=="A") or ( G.node[n]['type']=="F"):
                        if G.node[n]['label']!="Wunderink"  and G.node[n]["label"]!="Weiss": 
                           rand=random.random()
                           if rand< prob_Immune:
                              G.node[n]["status"]="Immune"
                              


        
  
                  ################# the dynamics starts: 
            
                  t=1
                  while t< cutting_day:  # loop over shifts, in order           
                     for n in G.nodes():
                        if G.node[n]['type']=="shift" and G.node[n]['order']==t:
                           shift_length=int(G.node[n]['shift_length'])

                           if shift_length==2 and n not in list_id_weekends_T3:
                              shift_length=1   # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)

                           flag_possible_infection=0
                           for doctor in G.neighbors(n): #first i check if any doctor is infected in this shift
                              if G.node[doctor]["status"]=="I":
                                 flag_possible_infection=1
                                

                           if flag_possible_infection:
                              for doctor in G.neighbors(n): # then the doctors in that shift, gets infected with prob_infection

                                 for i in range(shift_length):   # i repeat the infection process several times, to acount for shift lenght
                                    if G.node[doctor]["status"]=="S":
                                       rand=random.random()
                                       if rand<prob_infection:  # with prob p the infection occurres
                                          
                                          G.node[doctor]["infec_value"]+=dose  # and bumps the infection_value of that susceptible dr
                                          
                                          if G.node[doctor]["infec_value"]>= G.node[doctor]["personal_threshold"]:  # the threshold for infection is personal
                                             
                                             G.node[doctor]["status"]="I"
                                             
                                        
                                                
                                             list_I.append(G.node[doctor]["label"])
                                                

                     list_single_t_evolution.append(float(len(list_I)))

                     t+=1   
                     ######## end t loop





          


                  list_lists_t_evolutions.append(list_single_t_evolution)
             
 
                  #print "actual:",len(list_actual_evol_training),"  simu:",len(list_single_t_evolution)
                  list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol_training,list_single_t_evolution))
                  
                  list_dist_abs_at_ending_point_fixed_parameters.append( abs(list_single_t_evolution[-1]-list_actual_evol_training[-1]) )   # i save the distance at the ending point between the current simu and actual evol
                  list_dist_at_ending_point_fixed_parameters.append( list_single_t_evolution[-1]-list_actual_evol_training[-1])    # i save the distance at the ending point between the current simu and actual evol

                           
                  ######## end loop Niter for the training fase
      

       


       
       
               list_pair_dist_std_delta_end=[]
               
               list_pair_dist_std_delta_end.append(numpy.mean(list_dist_fixed_parameters) )   # average dist between the curves over Niter
               list_pair_dist_std_delta_end.append(numpy.std(list_dist_fixed_parameters) )
               
               list_pair_dist_std_delta_end.append(numpy.mean(list_dist_abs_at_ending_point_fixed_parameters))
               

               value=numpy.mean(list_dist_fixed_parameters) *numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)# if SD=0, it is a problem, because then that is the minimun value, but not the optimum i am looking for!!
    
               dict_filenames_prod_distances[output_file2]=  value

               
               if (numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)) <= delta_end:  # i only consider situations close enough at the ending point   
                  
                  dict_filenames_tot_distance[output_file2]=list_pair_dist_std_delta_end
                                                                                          

                  histogram_filename="../Results/weight_shifts/histogr_raw_distances_ending_infection_memory_training_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_threshold_from_distrib_dose"+str(dose)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
                  histograma_gral_negv_posit.histograma(list_dist_at_ending_point_fixed_parameters,histogram_filename)
                  
                  histogram_filename2="../Results/weight_shifts/histogr_sum_dist_traject_infection_memory_training_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_threshold_from_distrib_dose"+str(dose)+"_"+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"
                  
                  histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,Nbins,histogram_filename2)
                  
                  
                  print  "written histogram file: ",histogram_filename
                  print  "written histogram file: ",histogram_filename2


               dose+= delta_dose          
            prob_infection+= delta_prob
      prob_Immune+= delta_prob_Immune



 

   string_name="infection_memory_training_"+fixed_param+str(Niter)+"iter_day"+str(cutting_day)+"_A_F_inferred.dat"   # for the "Results" file with the sorted list of files
   
   list_order_dict= compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(dict_filenames_tot_distance,string_name,Niter,cutting_day)
# it returns a list of tuples like this :  ('../Results/network_final_schedule_withTeam3_local/infection/Average_time_evolution_Infection_training_p0.7_Immune0.0_2iter_2012.dat', [2540.0, 208.0, 1.0])  the best set of parameters  being the fist one of the elements in that list.



   
   
   
   list_order_dict2= compare_real_evol_vs_simus_to_be_called.pick_minimum_prod_distances(dict_filenames_prod_distances,string_name,Niter,cutting_day)


   

   prob_infection=float(list_order_dict[0][0].split("_p")[1].split("_")[0])
   prob_Immune=float(list_order_dict[0][0].split("_Immune")[1].split("_")[0]) 
   dose=float(list_order_dict[0][0].split("_dose")[1].split("_")[0])
 
  
   
  
   print "\nOptimum parameters (old method) at day",cutting_day," are: p=",prob_infection," Pimmune=",prob_Immune," infection threshold from distribution, and dose=",dose
   


  # optimum_filename=list_order_dict2[0][0]

   prob_infection=float(list_order_dict2[0][0].split("_p")[1].split("_")[0])
   prob_Immune=float(list_order_dict2[0][0].split("_Immune")[1].split("_")[0])  
   dose=float(list_order_dict2[0][0].split("_dose")[1].split("_")[0])
 
  
 
   print "Optimum parameters (product of distances and SDs) at day",cutting_day," are: p=",prob_infection," Pimmune=",prob_Immune," infection threshold from distribution, and dose=",dose
def main():

    Nbins = 12

    Num_clusters = 5

    Niter = 200  # for bootstraping the gender distributions by cluster vs all

    #####Clusters from kmeans k=5, (iter109)

    #####Cluster1: cooperate only in up triangle H   (Competidores)
    #####Cluster2: cooperate in left half of plane   (Cazarecompensas)
    #####Cluster3: rarunos
    #####Cluster4: cooperate everywhere  (Cooperators)
    #####Cluster5: cooperate in top half of plane (Conservadores)

    dict_cluster_number_name = {
        1: "Competitive",
        2: "Greedy",
        3: "Clueless",
        4: "Altruists",
        5: "Conservative"
    }

    ######### input masterfile
    filename = "../Data/userdata.pickle"
    master_list = pickle.load(
        open(filename, 'rb')
    )  #### master_list  tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...],      'nickname': u'Caesar', 'id': 2L}]

    #   threshold_flag=   "_treshold0.8"   # "_treshold0.8"  or ""
    # gral_filename="../Results/dau2014_partition"+threshold_flag+"_Carlos_"

    gral_filename = "../Results/list_"

    #../Results/dau2014_partition_Carlos_rationals.pickle
    #../Results/dau2014_partition_treshold0.8_Carlos_rationals.pickle

    #../Results/list_rationals.pickle   # mia

    list_lists_def = []
    list_names = ["weirdos", "rationals", "mostly_def", "altruists"]

    file_weirdos = gral_filename + "weirdos.pickle"
    list_weirdos = pickle.load(open(file_weirdos, 'rb'))
    list_lists_def.append(list_weirdos)

    file_rationals = gral_filename + "rationals.pickle"
    list_rationals = pickle.load(open(file_rationals, 'rb'))
    list_lists_def.append(list_rationals)

    file_mostly_def = gral_filename + "mostly_def.pickle"
    list_mostly_def = pickle.load(open(file_mostly_def, 'rb'))
    list_lists_def.append(list_mostly_def)

    file_altruists = gral_filename + "altruists.pickle"
    list_altruists = pickle.load(open(file_altruists, 'rb'))
    list_lists_def.append(list_altruists)

    filename_all = "../Results/list_all_users.pickle"
    list_all_users = pickle.load(open(filename_all, 'rb'))

    dict_user_id_info = {}
    for dictionary in master_list:  # cada elemento de la lista es a su vez un dict

        partida = dictionary['partida']
        num_elecciones = int(dictionary['num_eleccions'])
        age = int(dictionary['edat'])
        num_rondas = len(dictionary['rondes'])
        tot_earnings = int(dictionary['guany_total'])
        nickname = unidecode(dictionary['nickname']).replace(" ", "_")

        earnings_by_round = tot_earnings / float(num_elecciones)

        print tot_earnings, float(num_elecciones), earnings_by_round
        gender = dictionary['genere']
        if gender == 'h':
            gender = 1
        elif gender == 'd':
            gender = 0

        user_id = int(dictionary['id'])

        dict_user_id_info[user_id] = {}

        dict_user_id_info[user_id]['num_elecciones'] = num_elecciones
        dict_user_id_info[user_id]['age'] = age
        dict_user_id_info[user_id]['gender'] = gender
        dict_user_id_info[user_id]['tot_earnings'] = tot_earnings
        dict_user_id_info[user_id]['earnings_by_round'] = earnings_by_round

#        print user_id, dict_user_id_info[user_id]['age'], dict_user_id_info[user_id]['gender'], dict_user_id_info[user_id]['tot_earnings'], nickname

    dict_cluster_number_list_ages_in_clusters = {
    }  # for the pairwise KS-test comparison
    list_cluster_numbers = []

    list_age_all = []
    list_gender_all = []
    list_tot_earnings_all = []
    list_norm_earnings_by_round_all = []

    for cluster in range(Num_clusters):
        cluster += 1
        print "\ncluster", cluster, dict_cluster_number_name[cluster]
        list_age_current_cluster = []
        list_gender_current_cluster = []
        list_tot_earnings_current_cluster = []
        list_norm_earnings_by_round_current_cluster = []

        file_cluster = "../Results/Niter_clustering/list_clusters_kmeans" + str(
            Num_clusters) + "-" + str(cluster) + "_109iter.pickle"

        #  file_cluster="../Results/list_clusters_kmeans5_dist_notypes-1.pickle  # the MeV data

        list_current_cluster = pickle.load(open(file_cluster, 'rb'))

        for user_id in list_current_cluster:
            list_age_current_cluster.append(dict_user_id_info[user_id]['age'])
            list_gender_current_cluster.append(
                dict_user_id_info[user_id]['gender'])
            list_tot_earnings_current_cluster.append(
                dict_user_id_info[user_id]['tot_earnings'])
            list_norm_earnings_by_round_current_cluster.append(
                dict_user_id_info[user_id]['earnings_by_round'])

            list_age_all.append(dict_user_id_info[user_id]['age'])
            list_gender_all.append(dict_user_id_info[user_id]['gender'])
            list_tot_earnings_all.append(
                dict_user_id_info[user_id]['tot_earnings'])
            list_norm_earnings_by_round_all.append(
                dict_user_id_info[user_id]['earnings_by_round'])

        dict_cluster_number_list_ages_in_clusters[
            cluster] = list_age_current_cluster

        ######## i calculate the z-score of a cluster's gender distribution vs the total pupulation
        list_avg_gender_in_synthetic_cluster = []
        for iter in range(Niter):  # bootstrapping the age distribution vs all
            list_synth = sample_with_replacement(
                list_gender_all, len(list_gender_current_cluster))
            list_avg_gender_in_synthetic_cluster.append(numpy.mean(list_synth))

        z_score = (numpy.mean(list_gender_current_cluster) -
                   numpy.mean(list_avg_gender_in_synthetic_cluster)
                   ) / numpy.std(list_avg_gender_in_synthetic_cluster)

        print "z-score of gender distributions of cluster", dict_cluster_number_name[
            cluster], "vs all:", z_score

        raw_input()

        histograma_bines_gral.histograma_bins(
            list_age_current_cluster, Nbins, "../Results/Hist_age_cluster" +
            str(Num_clusters) + "_" + str(cluster) + ".dat")

        print "avg. age:", numpy.mean(
            list_age_current_cluster), "std:", numpy.std(
                list_age_current_cluster)

        histograma_bines_gral.histograma_bins(
            list_tot_earnings_current_cluster, Nbins,
            "../Results/Hist_tot_earnings_cluster" + str(Num_clusters) + "_" +
            str(cluster) + ".dat")
        print "avg. earnings:", numpy.mean(
            list_tot_earnings_current_cluster), "std:", numpy.std(
                list_tot_earnings_current_cluster)

        histograma_bines_gral.histograma_bins(
            list_norm_earnings_by_round_current_cluster, Nbins,
            "../Results/Hist_norm_earnings_by_round_cluster" +
            str(Num_clusters) + "_" + str(cluster) + ".dat")
        print "avg. earnings/num_rounds:", numpy.mean(
            list_norm_earnings_by_round_current_cluster), "std:", numpy.std(
                list_norm_earnings_by_round_current_cluster)

        print "avg. gender:", numpy.mean(
            list_gender_current_cluster), numpy.std(
                list_gender_current_cluster), "   (1: male, 0: female)"

        print "cluster size:", len(list_current_cluster)

    for i in range(len(list_lists_def)):

        name_def = list_names[i]
        print name_def
        list_age_current_cluster = []
        list_gender_current_cluster = []
        list_tot_earnings_current_cluster = []
        list_norm_earnings_by_round_current_cluster = []

        list_current_cluster = list_lists_def[i]

        for user_id in list_current_cluster:
            list_age_current_cluster.append(dict_user_id_info[user_id]['age'])
            list_gender_current_cluster.append(
                dict_user_id_info[user_id]['gender'])
            list_tot_earnings_current_cluster.append(
                dict_user_id_info[user_id]['tot_earnings'])
            list_norm_earnings_by_round_current_cluster.append(
                dict_user_id_info[user_id]['earnings_by_round'])

        histograma_bines_gral.histograma_bins(
            list_age_current_cluster, Nbins,
            "../Results/Hist_age_cluster_" + str(name_def) + ".dat")

        print "avg. age:", numpy.mean(
            list_age_current_cluster), "std:", numpy.std(
                list_age_current_cluster)

        histograma_bines_gral.histograma_bins(
            list_tot_earnings_current_cluster, Nbins,
            "../Results/Hist_tot_earnings_cluster_" + str(name_def) + ".dat")

        print "avg. earnings:", numpy.mean(
            list_tot_earnings_current_cluster), "std:", numpy.std(
                list_tot_earnings_current_cluster)

        histograma_bines_gral.histograma_bins(
            list_norm_earnings_by_round_current_cluster, Nbins,
            "../Results/Hist_norm_earnings_by_round_cluster_" + str(name_def) +
            ".dat")
        print "avg. earnings/num_rounds:", numpy.mean(
            list_norm_earnings_by_round_current_cluster), "std:", numpy.std(
                list_norm_earnings_by_round_current_cluster)

        print "avg. gender:", numpy.mean(
            list_gender_current_cluster), numpy.std(
                list_gender_current_cluster), "   (1: male, 0: female)"

        print "cluster size:", len(list_current_cluster)

        print

    histograma_bines_gral.histograma_bins(list_age_all, Nbins,
                                          "../Results/Hist_age_all.dat")

    print "general avg. age:", numpy.mean(list_age_all), "std:", numpy.std(
        list_age_all)

    histograma_bines_gral.histograma_bins(
        list_tot_earnings_all, Nbins, "../Results/Hist_tot_earnings_all.dat")

    print "general avg. earnings:", numpy.mean(
        list_tot_earnings_all), "std:", numpy.std(list_tot_earnings_all)

    histograma_bines_gral.histograma_bins(
        list_norm_earnings_by_round_all, Nbins,
        "../Results/Hist_norm_earnings_by_round_all.dat")
    print "avg. earnings/num_rounds:", numpy.mean(
        list_norm_earnings_by_round_all), "std:", numpy.std(
            list_norm_earnings_by_round_all)

    print "general avg. gender:", numpy.mean(list_gender_all), numpy.std(
        list_gender_all), "   (1: male, 0: female)"

    print "tot population:", len(list_age_all)

    ######## KS test
    #This tests whether 2 samples are drawn from the same distribution. Note that, like in the case of the one-sample K-S test, the distribution is assumed to be continuous.

    #This is the two-sided test, one-sided tests are not implemented. The test uses the two-sided asymptotic Kolmogorov-Smirnov distribution.

    #If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same.

    print "\nKS-tests for age distributions by cluster:"
    for item in itertools.combinations(
            dict_cluster_number_list_ages_in_clusters, 2):

        list_ages_cluster1 = dict_cluster_number_list_ages_in_clusters[item[0]]
        list_ages_cluster2 = dict_cluster_number_list_ages_in_clusters[item[1]]

        print "for clusters:", dict_cluster_number_name[
            item[0]], dict_cluster_number_name[
                item[1]], "  (KS,p):", stats.ks_2samp(list_ages_cluster1,
                                                      list_ages_cluster2)

        print "  for cluster:", dict_cluster_number_name[
            item[0]], "vs all   (KS,p):", stats.ks_2samp(
                list_ages_cluster1, list_age_all)
        print "  for cluster:", dict_cluster_number_name[
            item[1]], "vs all   (KS,p):", stats.ks_2samp(
                list_ages_cluster2, list_age_all)

    print "\n   **If the K-S statistic is small or the p-value is high, then we cannot reject the hypothesis that the distributions of the two samples are the same."
Ejemplo n.º 9
0
def main(graph_name):
 



   G = nx.read_gml(graph_name)



   prob_infection=0.9
   prob_Immune=0.5
  
   Niter=100000




   dir_real_data='../Results/'

   all_team="NO"   # as adopters or not



######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################



   if all_team=="YES":    
      filename_actual_evol=dir_real_data+"HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv"

   else:
      filename_actual_evol=dir_real_data+"HospitalModel_august1_adoption_counts_SIMPLER.csv"
   #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs

  
   list_actual_evol=[]
   result_actual_file= csv.reader(open(filename_actual_evol, 'rb'), delimiter=',')
   cont=0
   for row in result_actual_file: 
       if cont>0:   # i ignore the first line with the headers
           
           
           num_adopters= row[3]
          
           list_actual_evol.append(float(num_adopters))
          
          

       cont+=1    
  

##################################################################










  
# i create the empty list of list for the Niter temporal evolutions
   num_shifts=0
   for n in G.nodes():
      G.node[n]["status"]="S" 
      if G.node[n]['type']=="shift":
         num_shifts+=1
          

      #  list_final_I_values_fixed_p=[]  # i dont care about the final values right now, but about the whole time evol
   list_lists_t_evolutions=[]    

      
   
   for iter in range(Niter):
            
            print "     iter:",iter


            list_I=[]  #list infected doctors
            list_ordering=[]
            list_s=[]
            list_A=[]
            list_F=[]


            ########### set I.C.

            max_order=0
            for n in G.nodes():
                G.node[n]["status"]="S"  # all nodes are Susceptible
                if G.node[n]['type']=="shift":
                    list_s.append(n)        
                    if  G.node[n]['order']>max_order:
                        max_order=G.node[n]['order']
                else:
                    if G.node[n]['label']=="Wunderink"  or G.node[n]["label"]=="Weiss":           
                        G.node[n]["status"]="I"                       
                        list_I.append(G.node[n]['label'])
          



                        ######################## WHAT ABOUT SMITH AND SPORN???



                    if G.node[n]['type']=="A":
                        list_A.append(n)
                    
                    if G.node[n]['type']=="F":
                        list_F.append(n)
                

            
           
            list_single_t_evolution=[]
            list_single_t_evolution.append(2.0)  # I always start with TWO infected doctors!!


            for n in G.nodes():   # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                if (G.node[n]['type']=="A") or ( G.node[n]['type']=="F"):
                    if G.node[n]['label']!="Wunderink"  and G.node[n]["label"]!="Weiss": # these particular two cant be immune
                        rand=random.random()
                        if rand< prob_Immune:
                            G.node[n]["status"]="Immune"



         #   print max_order
  
            ################# the dynamics starts: 
           
            t=1
            while t<= max_order:  # loop over shifts, in order           
                for n in G.nodes():
                    if G.node[n]['type']=="shift" and G.node[n]['order']==t:
                        flag_possible_infection=0
                        for doctor in G.neighbors(n): #first i check if any doctor is infected in this shift
                            if G.node[doctor]["status"]=="I":
                                flag_possible_infection=1
                                

                        if flag_possible_infection:
                            for doctor in G.neighbors(n): # then the doctors in that shift, gets infected with prob_infection
                                if G.node[doctor]["status"]=="S":
                                    rand=random.random()
                                    if rand<prob_infection:
                                        G.node[doctor]["status"]="I"
                                        list_I.append(G.node[doctor]["label"])
                                           

                list_single_t_evolution.append(float(len(list_I)))#/(len(list_A)+len(list_F)))
  

                t+=1
   
                      
            list_lists_t_evolutions.append(list_single_t_evolution)
         

           

    ######## end Niter



   ##############end loop Niter

   average_t_evolution=[]
   for i in range(len(list_single_t_evolution)):  #time step by time step
      list_fixed_t=[]
      for iteracion in range (Niter): #loop over all independent iter of the process
         list_fixed_t.append(list_lists_t_evolutions[iteracion][i])  # i collect all values for the same t, different iter  
         
      average_t_evolution.append(numpy.mean(list_fixed_t))   # i create the mean time evolution      





   list_dist_fixed_parameters=[]
   for lista in list_lists_t_evolutions:
      list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( lista,average_t_evolution))


               
 


      
   lista_tuplas=histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,75, "../Results/histogr_distances_indiv_infect_simus_to_the_average_curve_p"+str(prob_infection)+"_"+"Immune"+str(prob_Immune)+"_"+str(Niter)+"iter_2012.dat") # Nbins=50

   #print lista_tuplas


   starting_point=compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol,average_t_evolution)   # distance between actual curve and the mean curve


   prob=calculate_numeric_integral.integral(lista_tuplas, starting_point)

   print "the probability of having a  distance equal or larger than",starting_point, "between actual-average curve is:", prob, "(it is to say, the prob. of the actual evolution being an individual realization of the Infection Model)"
  
  
   if all_team=="YES":    
      file = open("../Results/distance_actual_to_average_curve_infection_all_team_as_adopters.dat",'wt')  
   else: 
      file = open("../Results/distance_actual_to_average_curve_infection.dat",'wt')  

   print >> file,starting_point, 0.
   print >> file,starting_point+0.1, 1.        
   file.close()






   if all_team=="YES":    
      file2 = open("../Results/Results_distance_actual_to_average_curve_infection_all_team_as_adopters.dat",'wt')       

   else: 
      file2 = open("../Results/Results_distance_actual_to_average_curve_infection.dat",'wt')  


   print >> file2, "the probability of having a  distance equal or larger than",starting_point, "between actual-average curve is:", prob, "(it is to say, the prob. of the actual evolution being an individual realization of the Infection Model)"


   file2.close()
def main(graph_name):

    G = nx.read_gml(graph_name)

    all_team = "NO"  # as adopters or not

    Niter = 20

    dir_real_data = '../Results/'

    delta_end = 300  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)

    output_file3 = dir_real_data + "Landscape_parameters_infection_" + str(
        Niter) + "iter.dat"
    file3 = open(output_file3, 'wt')

    file3.close()

    ######################################################################################
    #  I read the file of the actual evolution of the idea spreading in the hospital:   ##
    ######################################################################################

    if all_team == "YES":
        filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv"

    else:
        filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_SIMPLER.csv"
    #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs

    list_actual_evol = []
    result_actual_file = csv.reader(open(filename_actual_evol, 'rb'),
                                    delimiter=',')
    cont = 0
    for row in result_actual_file:
        if cont > 0:  # i ignore the first line with the headers

            num_adopters = row[3]

            list_actual_evol.append(float(num_adopters))

        cont += 1

##################################################################

#../Results/network_final_schedule_withTeam3/infection/Average_time_evolution_Infection_p0.9_Immune0.5_1000iter_2012.dat

    prob_min = 1.00
    prob_max = 1.01
    delta_prob = 0.1

    prob_Immune_min = 0.50
    prob_Immune_max = 0.51
    delta_prob_Immune = 0.1

    dir = "../Results/network_final_schedule_withTeam3_local/infection/"

    dict_filenames_tot_distance = {
    }  # i will save the filename as key and the tot distance from that curve to the original one

    prob_Immune = prob_Immune_min
    while prob_Immune <= prob_Immune_max:

        print "prom Immune:", prob_Immune

        prob_infection = prob_min
        while prob_infection <= prob_max:

            print "  p:", prob_infection

            output_file2 = dir + "Average_time_evolution_Infection_p" + str(
                prob_infection) + "_" + "Immune" + str(
                    prob_Immune) + "_" + str(Niter) + "iter_2012.dat"
            file2 = open(output_file2, 'wt')
            file2.close()

            # i create the empty list of list for the Niter temporal evolutions
            num_shifts = 0
            for n in G.nodes():
                G.node[n]["status"] = "S"
                if G.node[n]['type'] == "shift":
                    num_shifts += 1

        #  list_final_I_values_fixed_p=[]  # i dont care about the final values right now, but about the whole time evol
            list_lists_t_evolutions = []

            list_dist_fixed_parameters = []
            list_dist_at_ending_point_fixed_parameters = []
            list_final_num_infected = []

            for iter in range(Niter):

                print "     iter:", iter

                #######OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
                file_name_indiv_evol = output_file2.strip("Average_").split(
                    '.dat')[0] + "_indiv_iter" + str(iter) + ".dat"

                file4 = open(file_name_indiv_evol, 'wt')
                file4.close()
                ##########################################

                list_I = []  #list infected doctors
                list_ordering = []
                list_s = []

                ########### set I.C.

                max_order = 0
                for n in G.nodes():
                    G.node[n]["status"] = "S"  # all nodes are Susceptible
                    if G.node[n]['type'] == "shift":
                        list_s.append(n)
                        if G.node[n]['order'] > max_order:
                            max_order = G.node[n]['order']
                    else:
                        if G.node[n]['label'] == "Wunderink" or G.node[n][
                                "label"] == "Weiss":
                            G.node[n]["status"] = "I"
                            list_I.append(G.node[n]['label'])

                list_single_t_evolution = []
                list_single_t_evolution.append(
                    2.0)  # I always start with TWO infected doctors!!

                for n in G.nodes(
                ):  # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                    if (G.node[n]['type'] == "A") or (G.node[n]['type']
                                                      == "F"):
                        if G.node[n]['label'] != "Wunderink" and G.node[n][
                                "label"] != "Weiss":
                            rand = random.random()
                            if rand < prob_Immune:
                                G.node[n]["status"] = "Immune"

            #   print max_order

            ################# the dynamics starts:

                t = 1
                while t <= max_order:  # loop over shifts, in order
                    for n in G.nodes():
                        if G.node[n]['type'] == "shift" and G.node[n][
                                'order'] == t:
                            flag_possible_infection = 0
                            for doctor in G.neighbors(
                                    n
                            ):  #first i check if any doctor is infected in this shift
                                if G.node[doctor]["status"] == "I":
                                    flag_possible_infection = 1

                            if flag_possible_infection:
                                for doctor in G.neighbors(
                                        n
                                ):  # then the doctors in that shift, gets infected with prob_infection
                                    if G.node[doctor]["status"] == "S":
                                        rand = random.random()
                                        if rand < prob_infection:
                                            G.node[doctor]["status"] = "I"
                                            list_I.append(
                                                G.node[doctor]["label"])

                    list_single_t_evolution.append(float(len(list_I)))

                    t += 1

                    ######## end t loop

                ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
                file4 = open(file_name_indiv_evol, 'at')
                for i in range(
                        len(list_single_t_evolution)):  #ime step by time step
                    print >> file4, i, list_single_t_evolution[
                        i], prob_infection, prob_Immune
                file4.close()
                ########################################################

                list_lists_t_evolutions.append(list_single_t_evolution)

                list_dist_fixed_parameters.append(
                    compare_real_evol_vs_simus_to_be_called.compare_two_curves(
                        list_actual_evol, list_single_t_evolution))

                list_dist_at_ending_point_fixed_parameters.append(
                    abs(list_single_t_evolution[-1] - list_actual_evol[-1])
                )  # i save the distance at the ending point between the current simu and actual evol

                list_final_num_infected.append(time_evol_number_adopters[-1])

            ######## end loop Niter

            list_pair_dist_std_delta_end = []

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_dist_fixed_parameters)
            )  # average dist between the curves over Niter
            list_pair_dist_std_delta_end.append(
                numpy.std(list_dist_fixed_parameters))

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_dist_at_ending_point_fixed_parameters))

            file3 = open(output_file3, 'at')  # i print out the landscape
            print >> file3, alpha_F, damping, mutual_encouragement, threshold, numpy.mean(
                list_dist_at_ending_point_fixed_parameters), numpy.mean(
                    list_dist_fixed_parameters), numpy.mean(
                        list_final_num_adopt), numpy.std(list_final_num_adopt)
            file3.close()

            if (
                    numpy.mean(list_dist_at_ending_point_fixed_parameters)
            ) <= delta_end:  # i only consider situations close enough at the ending point

                dict_filenames_tot_distance[
                    output_file2] = list_pair_dist_std_delta_end

            file2 = open(output_file2, 'at')
            for s in range(len(list_single_t_evolution)):
                list_fixed_t = []
                for iter in range(Niter):
                    list_fixed_t.append(list_lists_t_evolutions[iter][s])
                print >> file2, s, numpy.mean(list_fixed_t)
            file2.close()

            # file = open(output_file,'at')
            #print >> file,  prob_infection, numpy.mean(list_final_I_values_fixed_p)
            #file.close()

            print list_dist_fixed_parameters

            histograma_bines_gral.histograma_bins(
                list_dist_fixed_parameters, 50,
                "../Results/histogr_distances_indiv_infect_simus_to_the_average_curve_p"
                + str(prob_infection) + "_" + "Immune" + str(prob_Immune) +
                "_" + str(Niter) + "iter.dat")  # Nbins=100

            prob_infection += delta_prob
        prob_Immune += delta_prob_Immune

    compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(
        dict_filenames_tot_distance, "Infection", all_team, Niter)
Ejemplo n.º 11
0
def main():

    minimum_time = 180
    min_num_weigh_ins = 2

    impossible_weight_change = 80.  # plus or minus, it is a mistake

    for_testing_max_num_queries = 1000

    database = "calorie_king_social_networking_2010"
    server = "tarraco.chem-eng.northwestern.edu"
    user = "******"
    passwd = "n1ckuDB!"

    db = Connection(server, database, user, passwd)

    query1 = """select * from users"""
    result1 = db.query(query1)  # is a list of dict.

    num_impossible_weight_changes = 0  # values larger than 100 or smaller than -100
    tot_users = 0
    tot_users_2weigh_ins = 0
    tot_users_6months = 0
    tot_users_2weigh_ins_6months = 0

    list_weight_changes_before_6months = []
    list_weight_changes_after_6months = []

    list_days_before_6months = []
    list_days_after_6months = []

    contador = 0

    for r1 in result1:

        #  if contador <= for_testing_max_num_queries:
        contador += 1

        ck_id = r1['ck_id']

        tot_users += 1

        list_before_6months = []
        list_after_6months = []

        query2 = "select  * from weigh_in_history where (ck_id ='" + str(
            ck_id) + "') order by on_day"
        result2 = db.query(query2)  # is a list of dicts.

        if len(result2) > min_num_weigh_ins:
            tot_users_2weigh_ins += 1

            first_date = result2[0]['on_day']
            last_date = result2[-1]['on_day']
            time_system = (last_date - first_date).days + 1

            if time_system >= minimum_time:

                tot_users_6months += 1

                if len(result2) > min_num_weigh_ins:
                    tot_users_2weigh_ins_6months += 1
                    print tot_users, tot_users_2weigh_ins_6months

            #  print ck_id

                for r2 in result2:
                    fecha = r2['on_day']
                    weight = r2['weight']
                    num_days = (fecha - first_date).days + 1

                    lista = []
                    lista.append(num_days)
                    lista.append(weight)

                    if num_days < minimum_time:
                        list_before_6months.append(lista)
                    else:
                        list_after_6months.append(lista)

                #  print fecha, num_days,weight

                weight_change_before = list_before_6months[-1][
                    1] - list_before_6months[0][1]
                if weight_change_before < impossible_weight_change and weight_change_before > -impossible_weight_change:
                    list_weight_changes_before_6months.append(
                        weight_change_before)
                else:
                    num_impossible_weight_changes += 1

                weight_change_after = list_after_6months[-1][
                    1] - list_after_6months[0][1]
                if weight_change_after < 100. and weight_change_after > -100.:
                    list_weight_changes_after_6months.append(
                        weight_change_after)
                else:
                    num_impossible_weight_changes += 1

                days_after = list_after_6months[-1][0] - list_after_6months[0][
                    0]
                list_days_after_6months.append(days_after)

                days_before = list_before_6months[-1][0] - list_before_6months[
                    0][0]
                list_days_before_6months.append(days_before)

            # print 'weight change before 6months:',weight_change_before,'  over:',days_before,'days,  counting',len(list_before_6months),'weigh_ins'
            #print 'and after:',weight_change_after, 'over:',days_after,'days,  counting',len(list_after_6months),'weigh_ins\n'

    print "tot number of users:", len(
        result1
    ), "  ||  num users >= 2 weigh-ins:", tot_users_2weigh_ins, "  ||  num users  >= 6months:", tot_users_6months, "  ||  num users >= 2 weigh-ins and >= 6months:", tot_users_2weigh_ins_6months, "\n"

    print  "average weight change before 6months:", numpy.mean(list_weight_changes_before_6months), "SD:", numpy.std(list_weight_changes_before_6months), \
        "over:", numpy.mean(list_days_before_6months), "days on average, SD:",numpy.std(list_days_before_6months)
    print  "average weight change after 6months:", numpy.mean(list_weight_changes_after_6months), "SD:", numpy.std(list_weight_changes_after_6months), \
        "over:", numpy.mean(list_days_after_6months), "days on average, SD:",numpy.std(list_days_after_6months),"\n"

    ks = stats.ks_2samp(list_weight_changes_before_6months,
                        list_weight_changes_after_6months)
    print "KS test for list weight changes before vs after the 6months:", ks

    print "number of impossible weight changes:", num_impossible_weight_changes

    num_users_regain = 0.
    for item in list_weight_changes_after_6months:
        if item > 0.:
            num_users_regain += 1.

    print "fraction users who (re)gain weight from the 6month mark on:", num_users_regain / float(
        len(list_weight_changes_after_6months)), "(average over", len(
            list_weight_changes_after_6months), "users)"

    num_users_gain = 0.
    for item in list_weight_changes_before_6months:
        if item > 0.:
            num_users_gain += 1.

    print "fraction users who gain weight during the first 6months:", num_users_gain / float(
        len(list_weight_changes_after_6months)), "(average over", len(
            list_weight_changes_after_6months), "users)"

    histograma_bines_gral.histograma_bins(list_weight_changes_before_6months,
                                          30, "weight_change_before_6months")
    histograma_bines_gral.histograma_bins(list_weight_changes_after_6months,
                                          30, "weight_change_after_6months")
def main():




    ######## input files for the comparison
   # filename1="../Results/Cooperation_TSplane_gender_1.dat"    #columns:   T  S  <Coop>  STD
    #filename2="../Results/Cooperation_TSplane_gender_0.dat"


    #filename1="../Results/Cooperation_TSplane_All_ages_rounds1_3.dat"
    filename1="../Results/Cooperation_TSplane_All_ages_rounds4_10.dat"
    filename2="../Results/Cooperation_TSplane_All_ages_rounds11_18.dat"




    #filename1="../Results/Cooperation_TSplane_young_ages.dat"
    #filename2="../Results/Cooperation_TSplane_adult_ages.dat"



    print "comparing files:"
    print "  ", filename1
    print "  ", filename2
    



    ####### output ratio file
    if "gender" in filename1:
        filename_ratio="../Results/Ratio_Cooperation_TSplane_genders_1_div_0.dat"
        filename_diff_relat="../Results/Diff_relat_Cooperation_TSplane_genders_1_div_0.dat"


    elif "ages.dat" in filename1:
        filename_ratio="../Results/Ratio_Cooperation_TSplane_ages_young_div_adult.dat"
        filename_diff_relat="../Results/Diff_relat_Cooperation_TSplane_ages_young_div_adult.dat"

    elif "round" in filename1:
        filename_ratio="../Results/Ratio_Cooperation_TSplane_rounds_"+filename1.split("rounds")[1].split(".")[0]+"_div_"+filename2.split("rounds")[1].split(".")[0]+".dat"
        filename_diff_relat="../Results/Diff_relat_Cooperation_TSplane_rounds_"+filename1.split("rounds")[1].split(".")[0]+"_div_"+filename2.split("rounds")[1].split(".")[0]+".dat"

    else:
        exit()

    file_ratio=open(filename_ratio, 'wt')
    file_diff_relat=open(filename_diff_relat, 'wt')


    print filename_ratio
    print filename_diff_relat





    ####### i read file1 and save it
    dict_file1_avg={}
    dict_file1_std={}
   
    list_order_tuplas=[]
    file1=open(filename1,'r')    
    for line_aux in file1:  # ASI LEO LINEA POR LINEA,EN LUGAR DE CARGARLAS  TODAS EN MEMORIA PRIMERO (en lugar de:  for line in list_lines )!!  
        try:
            line=line_aux.split(" ")           
            T=line[0]
            S=line[1]
            tupla=(T, S)
            list_order_tuplas.append(tupla)

            avg_Coop=line[2]
            std_Coop=line[3]
            
            dict_file1_avg[tupla]=float(avg_Coop)
            dict_file1_std[tupla]=float(std_Coop)
        except IndexError: # empty line
             list_order_tuplas.append(" ")


  

   
    ####### i read file2 and save it
    dict_file2_avg={}
    dict_file2_std={}

    file2=open(filename2,'r')   
    for line_aux in file2:    
        try:
            line=line_aux.split(" ")
           
            T=line[0]
            S=line[1]
            tupla=(T, S)
            
            avg_Coop=line[2]
            std_Coop=line[3]
            
            dict_file2_avg[tupla]=float(avg_Coop)
            dict_file2_std[tupla]=float(std_Coop)
        except IndexError: # empty line
            pass


    list_ratios=[]

    ######### i print out the ratio file with the same block structure
    for tupla in list_order_tuplas:
        if tupla != " ":  # i added a space arificially to separate blocks
            ratio=dict_file1_avg[tupla]/dict_file2_avg[tupla]

            diff_relat=(dict_file1_avg[tupla]-dict_file2_avg[tupla])/dict_file2_avg[tupla]


         #   print tupla[0], tupla[1], dict_file1_avg[tupla], dict_file2_avg[tupla], "  ratio:",ratio
            print   >> file_ratio, tupla[0], tupla[1], ratio
            print   >> file_diff_relat, tupla[0], tupla[1], diff_relat

            list_ratios.append(ratio)
            list_ratios.append(diff_relat)

        else:          
            print   >> file_ratio
            print   >> file_diff_relat


         
       
      

    print "written file:",filename_ratio
    print "written file:",filename_diff_relat


    if "gender" in filename1:
        name_h="../Results/histogram_Ratios_Cooperation_TSplane_genders_1_div_0.dat"
    elif "ages.dat" in filename1:
        name_h="../Results/histogram_Ratios_Cooperation_TSplane_ages_young_div_adult.dat"
    elif "round" in filename1:
        name_h="../Results/histogram_Ratios_Cooperation_TSplane_rounds_"+filename1.split("rounds")[1].split(".")[0]+"_div_"+filename2.split("rounds")[1].split(".")[0]+".dat"

    histograma_bines_gral.histograma_bins(list_ratios,20, name_h)
Ejemplo n.º 13
0
def main():

    pupulation_age = "All"  #"young"  # or "adult"   or "All"

    if pupulation_age == "young":
        min_age_threshold = 0
        max_age_threshold = 15
    elif pupulation_age == "adult":
        min_age_threshold = 16
        max_age_threshold = 100
    elif pupulation_age == "All":
        min_age_threshold = 0
        max_age_threshold = 100

    else:
        print "wrong age range"
        exit()

    gender_flag = "All"  #"All"  or     1 for males     or   0 for females

    R = 10
    P = 5

    #######  to select results only from given rounds  (both ends included)
    min_round = 1
    max_round = 18

    ######### input file
    filename = "../Data/userdata.pickle"
    master_list = pickle.load(open(
        filename, 'rb'))  # es una lista: un elemento por jugador (541)
    #########

    ######### output files
    Nbins_ages = 15
    name_h_ages = "../Results/histogram_ages_gender" + str(
        gender_flag) + ".dat"

    Nbins_payoffs = 20
    name_h_payoffs = "../Results/histogram_payoffs_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"

    output_filename1 = "../Results/Cooperation_TSplane_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"
    output1 = open(output_filename1, 'wt')

    output_filename2 = "../Results/Racionality_TSplane_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"
    output2 = open(output_filename2, 'wt')

    output_filename3 = "../Results/Ambition_TSplane_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"
    output3 = open(output_filename3, 'wt')

    output_filename4 = "../Results/Payoff_TSplane_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"
    output4 = open(output_filename4, 'wt')

    output_filename5 = "../Results/Payoff_norm_TSplane_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"
    output5 = open(output_filename5, 'wt')

    output_filename6 = "../Results/SEM_cooperation_TSplane_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"

    output_filename7 = "../Results/Numer_actions_TSplane_" + str(
        pupulation_age) + "_ages_rounds" + str(min_round) + "_" + str(
            max_round) + "_gender" + str(gender_flag) + ".dat"

    #########

    ### master_list  tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...],      'nickname': u'Caesar', 'id': 2L}]

    #la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda)
    # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...]

    dict_TSplane_list_actions = {}
    dict_TSplane_avg_coop = {}
    dict_TSplane_std_coop = {}
    dict_TSplane_sem_coop = {}  # error of the mean  =std/ sqrt(num points)

    dict_TSplane_list_rationality = {}
    dict_TSplane_avg_rationality = {}
    dict_TSplane_std_rationality = {}

    dict_TSplane_list_ambition = {}
    dict_TSplane_avg_ambition = {}
    dict_TSplane_std_ambition = {}

    dict_TSplane_list_payoff = {}
    dict_TSplane_avg_payoff = {}
    dict_TSplane_std_payoff = {}
    dict_TSplane_sem_payoff = {}

    dict_TSplane_list_payoff_norm = {
    }  # normalized payoff by the maximun possible in that TS point
    dict_TSplane_avg_payoff_norm = {}
    dict_TSplane_std_payoff_norm = {}
    dict_TSplane_sem_payoff_norm = {}

    dict_TSplane_num_actions = {}

    list_ages = []
    list_payoff_tot = []  # calculated (by Jordi) up to round #13

    cont_diff = 0
    for dictionary in master_list:  # cada elemento de la lista es a su vez un dict

        payoff_total = float(dictionary['guany_total']
                             )  # this is calculated only up to round #13  !!
        partida = dictionary['partida']

        list_payoff_tot.append(payoff_total)

        genero = dictionary['genere']
        if genero == "h":
            genero = 1
        elif genero == "d":
            genero = 0

        if gender_flag == "All" or gender_flag == genero:

            num_elecciones = dictionary['num_eleccions']
            age = int(dictionary['edat'])
            avg_racionalidad = dictionary['rationality']
            avg_ambicion = dictionary['ambition']
            num_rondas = len(dictionary['rondes'])
            nickname = unidecode(dictionary['nickname']).replace(" ", "_")
            user_id = dictionary['id']

            if num_elecciones != num_rondas:
                cont_diff += 1

            list_dict_rondas = dictionary['rondes']

            list_ages.append(age)

            for dict_ronda in list_dict_rondas:
                ##  cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}

                T = int(dict_ronda['T'])
                S = int(dict_ronda['S'])

                list_four_possible_values = [P, R, T, S]

                punto_TS = (T, S)

                try:
                    payoff = float(dict_ronda['guany'])
                    payoff_norm = float(dict_ronda['guany']) / float(
                        max(list_four_possible_values))
                except TypeError:
                    payoff = dict_ronda['guany']  # if payoff is None

                payoff_oponent = dict_ronda['guany_oponent']
                rationality = dict_ronda['rationality']
                ambition = dict_ronda['ambition']

                if rationality != None:
                    rationality = float(rationality) * 100.
                if ambition != None:
                    ambition = float(ambition) * 100.

                round_number = dict_ronda['numronda']

                action = dict_ronda['seleccio']
                if action == "C":
                    action = 1.
                elif action == "D":
                    action = 0.
                # si no ha elegido nada, es None

                num_ronda = dict_ronda['numronda']
                quadrant = dict_ronda['cuadrant'].replace(" ", "_").replace(
                    "'", "")

                action_oponent = dict_ronda['seleccio_oponent']
                if action_oponent == "C":
                    action_oponent = 1.
                elif action_oponent == "D":
                    action_oponent = 0.
                # si no ha elegido nada, es None

                oponent_id = dict_ronda['oponent']

                if punto_TS in dict_TSplane_list_actions:
                    if action != None:
                        if age >= min_age_threshold and age <= max_age_threshold:
                            if round_number <= max_round and round_number >= min_round:

                                dict_TSplane_list_actions[punto_TS].append(
                                    action)  # 1:C,  0:D
                                if rationality != None:
                                    dict_TSplane_list_rationality[
                                        punto_TS].append(rationality)
                                if ambition != None:
                                    dict_TSplane_list_ambition[
                                        punto_TS].append(ambition)

                                dict_TSplane_list_payoff[punto_TS].append(
                                    payoff)
                                dict_TSplane_list_payoff_norm[punto_TS].append(
                                    payoff_norm)
                else:
                    if action != None:

                        if age >= min_age_threshold and age <= max_age_threshold:
                            if round_number <= max_round and round_number >= min_round:

                                dict_TSplane_list_actions[punto_TS] = []
                                dict_TSplane_list_actions[punto_TS].append(
                                    action)

                                if rationality != None:
                                    dict_TSplane_list_rationality[
                                        punto_TS] = []
                                    dict_TSplane_list_rationality[
                                        punto_TS].append(rationality)

                                if ambition != None:
                                    dict_TSplane_list_ambition[punto_TS] = []
                                    dict_TSplane_list_ambition[
                                        punto_TS].append(ambition)

                                dict_TSplane_list_payoff[punto_TS] = []
                                dict_TSplane_list_payoff[punto_TS].append(
                                    payoff)

                                dict_TSplane_list_payoff_norm[punto_TS] = []
                                dict_TSplane_list_payoff_norm[punto_TS].append(
                                    payoff_norm)

#                            print "payoff:",payoff,"list:",  list_four_possible_values,"max:", max(list_four_possible_values), payoff_norm

    old_T = None
    ####### the the avg cooperation per TS point
    for punto_TS in sorted(dict_TSplane_list_actions):

        dict_TSplane_avg_coop[punto_TS] = numpy.mean(
            dict_TSplane_list_actions[punto_TS])
        dict_TSplane_std_coop[punto_TS] = numpy.std(
            dict_TSplane_list_actions[punto_TS])
        dict_TSplane_sem_coop[punto_TS] = stats.sem(
            dict_TSplane_list_actions[punto_TS]
        )  # standard error =std / sqrt(num points)

        dict_TSplane_avg_payoff[punto_TS] = numpy.mean(
            dict_TSplane_list_payoff[punto_TS])
        dict_TSplane_std_payoff[punto_TS] = numpy.std(
            dict_TSplane_list_payoff[punto_TS])
        dict_TSplane_sem_payoff[punto_TS] = stats.sem(
            dict_TSplane_list_payoff[punto_TS])

        dict_TSplane_avg_payoff_norm[punto_TS] = numpy.mean(
            dict_TSplane_list_payoff_norm[punto_TS])
        dict_TSplane_std_payoff_norm[punto_TS] = numpy.std(
            dict_TSplane_list_payoff_norm[punto_TS])
        dict_TSplane_sem_payoff_norm[punto_TS] = stats.sem(
            dict_TSplane_list_payoff_norm[punto_TS])

        dict_TSplane_num_actions[punto_TS] = len(
            dict_TSplane_list_actions[punto_TS])

        if old_T != punto_TS[0]:
            print >> output1
            print >> output4
            print >> output5

        print >> output1, punto_TS[0], punto_TS[1], dict_TSplane_avg_coop[
            punto_TS], dict_TSplane_std_coop[punto_TS], dict_TSplane_sem_coop[
                punto_TS]

        old_T = punto_TS[0]

        print >> output4, punto_TS[0], punto_TS[1], dict_TSplane_avg_payoff[
            punto_TS], dict_TSplane_std_payoff[
                punto_TS], dict_TSplane_sem_payoff[punto_TS]
        old_T = punto_TS[0]

        print >> output5, punto_TS[0], punto_TS[
            1], dict_TSplane_avg_payoff_norm[
                punto_TS], dict_TSplane_std_payoff_norm[
                    punto_TS], dict_TSplane_sem_payoff_norm[punto_TS]
        old_T = punto_TS[0]

    old_T = None
    for punto_TS in sorted(dict_TSplane_list_rationality):

        dict_TSplane_avg_rationality[punto_TS] = numpy.mean(
            dict_TSplane_list_rationality[punto_TS])
        dict_TSplane_std_rationality[punto_TS] = numpy.std(
            dict_TSplane_list_rationality[punto_TS])

        if old_T != punto_TS[0]:
            print >> output2

        print >> output2, punto_TS[0], punto_TS[
            1], dict_TSplane_avg_rationality[
                punto_TS], dict_TSplane_std_rationality[
                    punto_TS], dict_TSplane_std_rationality[
                        punto_TS] / numpy.sqrt(
                            len(dict_TSplane_list_rationality[punto_TS]))
        old_T = punto_TS[0]

    old_T = None
    for punto_TS in sorted(dict_TSplane_list_ambition):

        dict_TSplane_avg_ambition[punto_TS] = numpy.mean(
            dict_TSplane_list_ambition[punto_TS])
        dict_TSplane_std_ambition[punto_TS] = numpy.std(
            dict_TSplane_list_ambition[punto_TS])

        if old_T != punto_TS[0]:
            print >> output3

        print >> output3, punto_TS[0], punto_TS[1], dict_TSplane_avg_ambition[
            punto_TS], dict_TSplane_std_ambition[
                punto_TS], dict_TSplane_std_ambition[punto_TS] / numpy.sqrt(
                    len(dict_TSplane_list_ambition[punto_TS]))

        old_T = punto_TS[0]
        old_T = punto_TS[0]

    histograma_bines_gral.histograma_bins(
        list_ages, Nbins_ages, name_h_ages
    )  #x_position , norm_count, count, norm_cumulat_count, cumulat_count ,  float(hist[b])/float(len(lista))

    histograma_bines_gral.histograma_bins(list_payoff_tot, Nbins_payoffs,
                                          name_h_payoffs)

    print_values_dict_for_matrix_plotting(dict_TSplane_avg_coop,
                                          output_filename1)
    print_values_dict_for_matrix_plotting(dict_TSplane_sem_coop,
                                          output_filename6)

    print_values_dict_for_matrix_plotting(dict_TSplane_num_actions,
                                          output_filename7)

    output1.close()
    output2.close()
    output3.close()
    output4.close()
    output5.close()
    print "written output datafile:", output_filename1
    print "written output datafile:", output_filename2
    print "written output datafile:", output_filename3
    print "written output datafile:", output_filename4
    print "written output datafile:", output_filename5

    print "num. times that #rounds != # elections", cont_diff

    print "lenght master list.", len(master_list)
Ejemplo n.º 14
0
def main():

    pupulation_age = "All"  #"young"  # or "adult"   or "All"

    if pupulation_age == "young":
        min_age_threshold = 0
        max_age_threshold = 15
    elif pupulation_age == "adult":
        min_age_threshold = 16
        max_age_threshold = 100
    elif pupulation_age == "All":
        min_age_threshold = 0
        max_age_threshold = 100

    else:
        print "wrong age range"
        exit()

#  R=10
# P=5

#######  to select results only from given rounds  (both ends included)
    min_round = 1
    max_round = 18

    umbral_coop = 0.75  # to saparate good people from bastards in the lower Harmony
    Niter = 5000  # for the bootstrapping

    type_definition = "Harmony"  # "lowerPD"  "higherPD"# #   #"higherHarmony"   #"lowerHarmony"  #"SD"  # or "PD" "SH" "Harmony"

    print "Cooperation threshold for good people in", type_definition, umbral_coop
    print "Niter for bootstrapping:", Niter

    ######### input file
    filename = "../Data/userdata.pickle"
    master_list = pickle.load(open(
        filename, 'rb'))  # es una lista: un elemento por jugador (541)

    ######### output files
    pickle_file_good_people = "../Results/list_good_guys_" + str(
        type_definition) + "_threshold_coop" + str(umbral_coop) + ".pickle"
    pickle_file_bad_people = "../Results/list_bad_guys_" + str(
        type_definition) + "_threshold_coop" + str(umbral_coop) + ".pickle"

    pickle_file_all = "../Results/list_all_users.pickle"

    Nbins_avg_coop = 20
    name_h_avg_coop = "../Results/histogram_general_avg_coop.dat"

    #########

    ### master_list  tiene la forma: [{'guany_total': 110L, 'partida': 1L, 'genere': u'h', 'num_eleccions': 14, 'edat': 50L, 'rationality': 66.666666666666671, 'ambition': 100.0, 'rondes': [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0},...],      'nickname': u'Caesar', 'id': 2L}]

    #la llave key tiene a su vez como valor una lista de diccionarios (uno por ronda)
    # [{'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}, {'guany_oponent': 6L, 'ambition': None, 'seleccio': u'D', 'oponent': 17L, 'S': 6L, 'T': 8L, 'seleccio_oponent': u'C', 'numronda': 2L, 'guany': 8L, 'cuadrant': u'Harmony', 'rationality': 0.0}, ...]

    num_valid_actions = 0.
    num_actions_in_focus_region = 0

    num_coop_actions_in_focus_region = 0

    dict_user_list_actions_in_focus_region = {
    }  # that region is either the lower Harmony or the whole PD
    dict_user_avg_coop_in_focus_region = {}

    dict_user_list_actions = {}
    dict_user_avg_coop = {}

    num_users = float(len(master_list))

    list_cooperators_in_focus_region = [
    ]  # if the cooperate at least once in the region
    list_defectors_in_focus_region = []

    list_all_users = []

    ##### loop over different users
    for dictionary in master_list:  # cada elemento de la lista es a su vez un dict

        nickname = unidecode(dictionary['nickname']).replace(" ", "_")
        user_id = dictionary['id']

        payoff_total = float(dictionary['guany_total']
                             )  # this is calculated only up to round #13  !!
        partida = dictionary['partida']

        gender = dictionary['genere']
        if gender == "h":
            gender = 1
        elif gender == "d":
            gender = 0

        num_elecciones = int(dictionary['num_eleccions'])
        age = int(dictionary['edat'])
        avg_racionalidad = dictionary['rationality']
        avg_ambicion = dictionary['ambition']
        num_rondas = len(dictionary['rondes'])

        list_dict_rondas = dictionary['rondes']

        ######## list of rounds for a given user_id
        for dict_ronda in list_dict_rondas:
            ##  cada diccionario de ronda tiene: {'guany_oponent': 10L, 'ambition': None, 'seleccio': u'C', 'oponent': 7L, 'S': 6L, 'T': 5L, 'seleccio_oponent': u'C', 'numronda': 1L, 'guany': 10L, 'cuadrant': u'Harmony', 'rationality': 1.0}

            T = int(dict_ronda['T'])
            S = int(dict_ronda['S'])
            punto_TS = (T, S)

            round_number = dict_ronda['numronda']

            action = dict_ronda['seleccio']
            if action == "C":
                action = 1.
            elif action == "D":
                action = 0.
            # si no ha elegido nada, es None

            if action != None:
                num_valid_actions += 1

                if user_id not in list_all_users:
                    list_all_users.append(user_id)

            num_ronda = dict_ronda['numronda']
            quadrant = dict_ronda['cuadrant'].replace(" ",
                                                      "_").replace("'", "")

            action_oponent = dict_ronda['seleccio_oponent']
            if action_oponent == "C":
                action_oponent = 1.
            elif action_oponent == "D":
                action_oponent = 0.
            # si no ha elegido nada, es None

            #### for the general histogram of cooperation
            if user_id not in dict_user_list_actions:
                dict_user_list_actions[user_id] = []
            if action != None:
                dict_user_list_actions[user_id].append(action)

            if type_definition == "Harmony":

                if S >= 5 and S <= 10:
                    if T >= 5 and T <= 10:
                        if user_id not in dict_user_list_actions_in_focus_region:
                            dict_user_list_actions_in_focus_region[
                                user_id] = []

                        if action == 1:
                            if user_id not in list_cooperators_in_focus_region:
                                list_cooperators_in_focus_region.append(
                                    user_id)
                            num_coop_actions_in_focus_region += 1
                        elif action == 0:
                            if user_id not in list_defectors_in_focus_region:
                                list_defectors_in_focus_region.append(user_id)

                        if action != None:
                            dict_user_list_actions_in_focus_region[
                                user_id].append(action)
                            num_actions_in_focus_region += 1

            elif type_definition == "lowerHarmony":

                if S >= 5 and S <= 10:
                    if T >= 5 and T <= 10:
                        if S <= T:  # the lower triangle of the Harmony game:
                            if user_id not in dict_user_list_actions_in_focus_region:
                                dict_user_list_actions_in_focus_region[
                                    user_id] = []

                            if action == 1:
                                if user_id not in list_cooperators_in_focus_region:
                                    list_cooperators_in_focus_region.append(
                                        user_id)
                                num_coop_actions_in_focus_region += 1
                            elif action == 0:
                                if user_id not in list_defectors_in_focus_region:
                                    list_defectors_in_focus_region.append(
                                        user_id)

                            if action != None:
                                dict_user_list_actions_in_focus_region[
                                    user_id].append(action)
                                num_actions_in_focus_region += 1

            elif type_definition == "higherHarmony":

                if S >= 5 and S <= 10:
                    if T >= 5 and T <= 10:
                        if S > T:  # the higher triangle of the Harmony game:
                            if user_id not in dict_user_list_actions_in_focus_region:
                                dict_user_list_actions_in_focus_region[
                                    user_id] = []

                            if action == 1:
                                if user_id not in list_cooperators_in_focus_region:
                                    list_cooperators_in_focus_region.append(
                                        user_id)
                                num_coop_actions_in_focus_region += 1
                            elif action == 0:
                                if user_id not in list_defectors_in_focus_region:
                                    list_defectors_in_focus_region.append(
                                        user_id)

                            if action != None:
                                dict_user_list_actions_in_focus_region[
                                    user_id].append(action)
                                num_actions_in_focus_region += 1

            elif type_definition == "PD":
                if S >= 0 and S <= 5:
                    if T >= 10 and T <= 15:

                        if user_id not in dict_user_list_actions_in_focus_region:
                            dict_user_list_actions_in_focus_region[
                                user_id] = []

                        if action == 1:
                            if user_id not in list_cooperators_in_focus_region:
                                list_cooperators_in_focus_region.append(
                                    user_id)
                            num_coop_actions_in_focus_region += 1
                        elif action == 0:
                            if user_id not in list_defectors_in_focus_region:
                                list_defectors_in_focus_region.append(user_id)

                        if action != None:
                            dict_user_list_actions_in_focus_region[
                                user_id].append(action)
                            num_actions_in_focus_region += 1

            elif type_definition == "higherPD":
                if S >= 0 and S <= 5:
                    if T >= 10 and T <= 15:
                        if S >= -10 + T:
                            if user_id not in dict_user_list_actions_in_focus_region:
                                dict_user_list_actions_in_focus_region[
                                    user_id] = []

                            if action == 1:
                                if user_id not in list_cooperators_in_focus_region:
                                    list_cooperators_in_focus_region.append(
                                        user_id)
                                num_coop_actions_in_focus_region += 1
                            elif action == 0:
                                if user_id not in list_defectors_in_focus_region:
                                    list_defectors_in_focus_region.append(
                                        user_id)

                            if action != None:
                                dict_user_list_actions_in_focus_region[
                                    user_id].append(action)
                                num_actions_in_focus_region += 1

            elif type_definition == "lowerPD":
                if S >= 0 and S <= 5:
                    if T >= 10 and T <= 15:
                        if S < -10 + T:
                            if user_id not in dict_user_list_actions_in_focus_region:
                                dict_user_list_actions_in_focus_region[
                                    user_id] = []

                            if action == 1:
                                if user_id not in list_cooperators_in_focus_region:
                                    list_cooperators_in_focus_region.append(
                                        user_id)
                                num_coop_actions_in_focus_region += 1
                            elif action == 0:
                                if user_id not in list_defectors_in_focus_region:
                                    list_defectors_in_focus_region.append(
                                        user_id)

                            if action != None:
                                dict_user_list_actions_in_focus_region[
                                    user_id].append(action)
                                num_actions_in_focus_region += 1

            elif type_definition == "SH":
                if S >= 0 and S <= 5:
                    if T >= 5 and T <= 10:

                        if user_id not in dict_user_list_actions_in_focus_region:
                            dict_user_list_actions_in_focus_region[
                                user_id] = []

                        if action == 1:
                            if user_id not in list_cooperators_in_focus_region:
                                list_cooperators_in_focus_region.append(
                                    user_id)
                            num_coop_actions_in_focus_region += 1
                        elif action == 0:
                            if user_id not in list_defectors_in_focus_region:
                                list_defectors_in_focus_region.append(user_id)

                        if action != None:
                            dict_user_list_actions_in_focus_region[
                                user_id].append(action)
                            num_actions_in_focus_region += 1

            elif type_definition == "SD":
                if S >= 5 and S <= 10:
                    if T >= 10 and T <= 15:

                        if user_id not in dict_user_list_actions_in_focus_region:
                            dict_user_list_actions_in_focus_region[
                                user_id] = []

                        if action == 1:
                            if user_id not in list_cooperators_in_focus_region:
                                list_cooperators_in_focus_region.append(
                                    user_id)
                            num_coop_actions_in_focus_region += 1
                        elif action == 0:
                            if user_id not in list_defectors_in_focus_region:
                                list_defectors_in_focus_region.append(user_id)

                        if action != None:
                            dict_user_list_actions_in_focus_region[
                                user_id].append(action)
                            num_actions_in_focus_region += 1

    ###### end loop over user_ids in the main dict

    ######## obtaining the subset of user_id who cooperated > umbral_coop in the focus region

    list_avg_defectors_in_focus_region = []
    list_avg_cooperators_in_focus_region = []
    for user_id in dict_user_list_actions_in_focus_region:  #over all user_ids who played in that region
        dict_user_avg_coop_in_focus_region[user_id] = numpy.mean(
            dict_user_list_actions_in_focus_region[user_id])

        if dict_user_avg_coop_in_focus_region[user_id] > umbral_coop:
            list_avg_cooperators_in_focus_region.append(user_id)
        else:
            list_avg_defectors_in_focus_region.append(user_id)

    ###### for the histogram of general cooperation
    list_avg_coop = []
    for user_id in dict_user_list_actions:
        list_avg_coop.append(numpy.mean(dict_user_list_actions[user_id]))

    histograma_bines_gral.histograma_bins(list_avg_coop, Nbins_avg_coop,
                                          name_h_avg_coop)
    # print "avg coop this group:", numpy.mean(list_avg_coop), "median:",numpy.median(list_avg_coop), min(list_avg_coop),  max(list_avg_coop)

    print "# user_ids that play in", type_definition, len(
        dict_user_list_actions_in_focus_region
    ), " who cooperated >", umbral_coop * 100, "%:", len(
        list_avg_cooperators_in_focus_region)

    print "# items in the pickle (tot # users):", len(master_list)

    print "\n# unique defectors in", type_definition, "(defect at least once):", len(
        list_defectors_in_focus_region), "  # avg-defectors:", len(
            list_avg_defectors_in_focus_region)

    print "\n# unique coop in", type_definition, "(cooperate at least once):", len(
        list_cooperators_in_focus_region
    ), "  # actions in", type_definition, ":", num_actions_in_focus_region, " fract_coop:", num_coop_actions_in_focus_region / float(
        num_actions_in_focus_region
    ), " # avg cooperators (> coop_threshold) in", type_definition, ":", len(
        list_avg_cooperators_in_focus_region), "  # avg-cooperators:", len(
            list_avg_cooperators_in_focus_region)

    # print "\nintersection unique users cooperators and defectors in lower Harmony", len(list(set(list_cooperators_in_focus_region) & set(list_defectors_in_focus_region)))

    print "  tot # valid actions:", num_valid_actions, "  tot # users:", num_users

    pickle.dump(list_avg_cooperators_in_focus_region,
                open(pickle_file_good_people, 'wb'))
    print "written pickle:", pickle_file_good_people

    pickle.dump(list_avg_defectors_in_focus_region,
                open(pickle_file_bad_people, 'wb'))
    print "written pickle:", pickle_file_bad_people

    pickle.dump(list_all_users, open(pickle_file_all, 'wb'))
    print "written pickle:", pickle_file_all

    ####### i read the master dict again to compare levels of cooperations for some sets of users
    list_actions_all_users = []
    list_actions_coop_in_focus_region = []
    list_actions_NO_coop_in_focus_region = []

    for dictionary in master_list:  # cada elemento de la lista es a su vez un dict

        user_id = dictionary['id']
        list_dict_rondas = dictionary['rondes']

        for dict_ronda in list_dict_rondas:

            action = dict_ronda['seleccio']
            if action == "C":
                action = 1.
            elif action == "D":
                action = 0.

            if action != None:
                list_actions_all_users.append(action)
                if user_id in list_avg_cooperators_in_focus_region:
                    list_actions_coop_in_focus_region.append(action)
                elif user_id in list_avg_defectors_in_focus_region:
                    list_actions_NO_coop_in_focus_region.append(action)

    avg_real_coop_among_coop_in_focus_region = numpy.mean(
        list_actions_coop_in_focus_region)
    avg_real_coop_among_NONcoop_in_focus_region = numpy.mean(
        list_actions_NO_coop_in_focus_region)

    print "\nAvg coop all users all TS-plane:", numpy.mean(
        list_actions_all_users), "  tot # actions:", len(
            list_actions_all_users)

    print "\n Bootstrapping..."

    ########## bootstrapping  to see if the cooperators (def as coop > umbral_coop) in focus region are special
    print "\nCooperators in", type_definition, " vs all:"
    bootstrapping.zscore(list_actions_all_users,
                         len(list_actions_coop_in_focus_region), Niter,
                         avg_real_coop_among_coop_in_focus_region)
    print "  # users in", type_definition, ":", len(
        list_avg_cooperators_in_focus_region)
    print "Avg coop this set users in all TS-plane:", avg_real_coop_among_coop_in_focus_region, "  # actions:", len(
        list_actions_coop_in_focus_region)

    ########## bootstrapping  to see if the defectors (def as coop <umbral_coop)  in focus region are special
    print "\nDefectors in", type_definition, " vs all:"
    bootstrapping.zscore(list_actions_all_users,
                         len(list_actions_NO_coop_in_focus_region), Niter,
                         avg_real_coop_among_NONcoop_in_focus_region)
    print "  # users in", type_definition, ":", len(
        list_avg_defectors_in_focus_region)
    print "Avg coop this set in all TS-plane:", avg_real_coop_among_NONcoop_in_focus_region, "  # actions:", len(
        list_actions_NO_coop_in_focus_region)
Ejemplo n.º 15
0
def main(graph_name):

    G = nx.read_gml(graph_name)

    cutting_day = 100  # i use this only for the filenames

    for_testing_fixed_set = "YES"  # when YES, fixed values param, to get all statistics on final distances etc
    # change the range for the parameters accordingly

    envelopes = "YES"

    Niter = 1000  # 100 iter seems to be enough (no big diff. with respect to 1000it)

    percent_envelope = 95.

    list_id_weekends_T3 = look_for_T3_weekends(
        G
    )  # T3 doesnt share fellows in the weekend  (but they are the exception)
    Nbins = 200  # for the histogram of sum of distances

    all_team = "NO"  # as adopters or not

    dir_real_data = '../Results/'
    dir = "../Results/weight_shifts/infection/"

    delta_end = 3.  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)

    if for_testing_fixed_set == "NO":
        output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_memory_" + str(
            Niter) + "iter_A_F_inferred_middle.dat"
        file3 = open(output_file3, 'wt')

        file3.close()

######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################

    if all_team == "YES":
        print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
        exit()

    else:
        filename_actual_evol = "../Results/Actual_evolution_adopters_from_inference.dat"

    file1 = open(
        filename_actual_evol, 'r'
    )  ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
    list_lines_file = file1.readlines()

    list_actual_evol = []
    for line in list_lines_file:  # [1:]:   # i exclude the first row

        num_adopters = float(line.split("\t")[1])
        list_actual_evol.append(num_adopters)

##################################################################

    prob_min = 0.7
    prob_max = 0.701
    delta_prob = 0.1

    prob_Immune_min = 0.00
    prob_Immune_max = 0.001
    delta_prob_Immune = 0.1

    ##########  KEEP FIXED TO ONE
    infect_threshold_min = 1.00  # i can define the dose in units of the threshold
    infect_threshold_max = 1.001
    delta_infect_threshold = 0.1
    ############

    dose_min = 0.2  # of a single encounter with an infected  (starting from zero doesnt make sense)
    dose_max = 0.201
    delta_dose = 0.01

    dict_filenames_tot_distance = {
    }  # i will save the filename as key and the tot distance from that curve to the original one

    prob_Immune = prob_Immune_min
    while prob_Immune <= prob_Immune_max:

        print "prom Immune:", prob_Immune

        prob_infection = prob_min
        while prob_infection <= prob_max:

            print "  p:", prob_infection

            infect_threshold = infect_threshold_min

            dose = dose_min
            while dose <= dose_max:

                print "  dose:", dose

                output_file2 = dir + "Average_time_evolution_Infection_memory_p" + str(
                    prob_infection
                ) + "_Immune" + str(prob_Immune) + "_FIXED_threshold" + str(
                    infect_threshold) + "_dose" + str(dose) + "_" + str(
                        Niter) + "iter_A_F_inferred_middle.dat"

                file2 = open(output_file2, 'wt')
                file2.close()

                #  list_final_I_values_fixed_p=[]  # i dont care about the final values right now, but about the whole time evol
                list_lists_t_evolutions = []

                list_dist_fixed_parameters = []
                list_dist_fixed_parameters_testing_segment = []
                list_abs_dist_at_ending_point_fixed_parameters = []
                list_dist_at_ending_point_fixed_parameters = []
                list_final_num_infected = []
                list_abs_dist_point_by_point_indiv_simus_to_actual = []
                list_dist_point_by_point_indiv_simus_to_actual = []

                #   list_abs_dist_at_cutting_day=[]

                for iter in range(Niter):

                    #   print "     iter:",iter

                    ########### set I.C.

                    list_I = []  #list infected doctors
                    max_order = 0
                    for n in G.nodes():
                        G.node[n]["status"] = "S"  # all nodes are Susceptible
                        G.node[n][
                            "infec_value"] = 0.  # when this value goes over the infect_threshold, the dr is infected
                        if G.node[n]['type'] == "shift":
                            if G.node[n]['order'] > max_order:
                                max_order = G.node[n][
                                    'order']  # to get the last shift-order for the time loop
                        else:
                            if G.node[n]['label'] == "Wunderink" or G.node[n][
                                    "label"] == "Weiss":
                                G.node[n][
                                    "infec_value"] = infect_threshold + 1.
                                G.node[n]["status"] = "I"
                                list_I.append(G.node[n]['label'])

                    list_single_t_evolution = []
                    list_single_t_evolution.append(
                        2.0)  # I always start with TWO infected doctors!!
                    old_num_adopters = 2

                    for n in G.nodes(
                    ):  # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                        if (G.node[n]['type'] == "A") or (G.node[n]['type']
                                                          == "F"):
                            if G.node[n]['label'] != "Wunderink" and G.node[n][
                                    "label"] != "Weiss":
                                rand = random.random()
                                if rand < prob_Immune:
                                    G.node[n]["status"] = "Immune"

                    ################# the dynamics starts:

                    shift_length = 5  #i know the first shift (order 0) is of length 5

                    t = 0
                    while t <= max_order:  # loop over shifts, in order
                        for n in G.nodes():
                            if G.node[n]['type'] == "shift" and G.node[n][
                                    'order'] == t:
                                shift_length = int(G.node[n]['shift_length'])

                                if shift_length == 2 and n not in list_id_weekends_T3:
                                    shift_length = 1  # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)

                                flag_possible_infection = 0
                                for doctor in G.neighbors(
                                        n
                                ):  #first i check if any doctor is infected in this shift
                                    if G.node[doctor]["status"] == "I":
                                        flag_possible_infection = 1

                                if flag_possible_infection:
                                    for doctor in G.neighbors(
                                            n
                                    ):  # then the doctors in that shift, gets infected with prob_infection

                                        for i in range(shift_length):
                                            if G.node[doctor]["status"] == "S":
                                                rand = random.random()
                                                if rand < prob_infection:  # with prob p the infection occurres

                                                    G.node[doctor][
                                                        "infec_value"] += dose  # and bumps the infection_value of that susceptible dr

                                                    if G.node[doctor][
                                                            "infec_value"] >= infect_threshold:  # becomes  infected

                                                        G.node[doctor][
                                                            "status"] = "I"
                                                        # if G.node[doctor]["type"]=="A":   # fellows participate in the dynamics, but i only consider the attendings as real adopters
                                                        list_I.append(
                                                            G.node[doctor]
                                                            ["label"])

                        new_num_adopters = len(list_I)

                        if shift_length == 5:  # i estimate that adoption happens in the middle of the shift
                            if t + 5 < max_order:
                                list_single_t_evolution.append(
                                    old_num_adopters)
                            if t + 4 < max_order:
                                list_single_t_evolution.append(
                                    old_num_adopters)
                            if t + 3 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)
                            if t + 2 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)
                            if t + 1 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)
                            t += 5

                        elif shift_length == 4:
                            if t + 4 < max_order:
                                list_single_t_evolution.append(
                                    old_num_adopters)
                            if t + 3 < max_order:
                                list_single_t_evolution.append(
                                    old_num_adopters)

                            if t + 2 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)

                            if t + 1 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)
                            t += 4

                        elif shift_length == 3:
                            if t + 3 < max_order:
                                list_single_t_evolution.append(
                                    old_num_adopters)

                            if t + 2 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)

                            if t + 1 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)

                            t += 3

                        elif shift_length == 2:
                            if t + 2 < max_order:
                                list_single_t_evolution.append(
                                    old_num_adopters)

                            if t + 1 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)

                            t += 2

                        elif shift_length == 1:
                            if t + 1 < max_order:
                                list_single_t_evolution.append(
                                    new_num_adopters)

                            t += 1

                        old_num_adopters = new_num_adopters

                        ######## end t loop

                    list_lists_t_evolutions.append(list_single_t_evolution)

                    list_dist_fixed_parameters.append(
                        compare_real_evol_vs_simus_to_be_called.
                        compare_two_curves(list_actual_evol,
                                           list_single_t_evolution))
                    list_dist_fixed_parameters_testing_segment.append(
                        compare_real_evol_vs_simus_to_be_called.
                        compare_two_curves_testing_segment(
                            list_actual_evol, list_single_t_evolution,
                            cutting_day))

                    list_abs_dist_at_ending_point_fixed_parameters.append(
                        abs(list_single_t_evolution[-1] - list_actual_evol[-1])
                    )  # i save the distance at the ending point between the current simu and actual evol
                    list_dist_at_ending_point_fixed_parameters.append(
                        list_single_t_evolution[-1] - list_actual_evol[-1]
                    )  # i save the distance at the ending point between the current simu and actual evol
                    list_final_num_infected.append(list_single_t_evolution[-1])

                    for index in range(len(list_single_t_evolution)):

                        list_abs_dist_point_by_point_indiv_simus_to_actual.append(
                            abs(list_single_t_evolution[index] -
                                list_actual_evol[index]))
                        list_dist_point_by_point_indiv_simus_to_actual.append(
                            list_single_t_evolution[index] -
                            list_actual_evol[index])

                ######## end loop Niter

                list_pair_dist_std_delta_end = []

                list_pair_dist_std_delta_end.append(
                    numpy.mean(list_dist_fixed_parameters)
                )  # average dist between the curves over Niter
                list_pair_dist_std_delta_end.append(
                    numpy.std(list_dist_fixed_parameters))

                list_pair_dist_std_delta_end.append(
                    numpy.mean(list_abs_dist_at_ending_point_fixed_parameters))

                if for_testing_fixed_set == "NO":
                    file3 = open(output_file3,
                                 'at')  # i print out the landscape
                    print >> file3, prob_infection, prob_Immune, numpy.mean(
                        list_abs_dist_at_ending_point_fixed_parameters
                    ), numpy.mean(list_dist_fixed_parameters), numpy.mean(
                        list_final_num_infected), numpy.std(
                            list_final_num_infected)
                    file3.close()

                if (
                        numpy.mean(
                            list_abs_dist_at_ending_point_fixed_parameters)
                ) <= delta_end:  # i only consider situations close enough at the ending point

                    dict_filenames_tot_distance[
                        output_file2] = list_pair_dist_std_delta_end

                file2 = open(output_file2, 'at')
                for s in range(len(list_single_t_evolution)):
                    list_fixed_t = []
                    for iter in range(Niter):
                        list_fixed_t.append(list_lists_t_evolutions[iter][s])
                    print >> file2, s, numpy.mean(list_fixed_t)
                file2.close()

                print "printed out: ", output_file2
                # raw_input()

                if envelopes == "YES":
                    calculate_envelope_set_curves.calculate_envelope(
                        list_lists_t_evolutions, percent_envelope, "Infection",
                        [prob_infection, prob_Immune])

                if for_testing_fixed_set == "YES":

                    num_valid_endings = 0.
                    for item in list_abs_dist_at_ending_point_fixed_parameters:
                        if item <= delta_end:  # i count how many realizations i get close enough at the ending point
                            num_valid_endings += 1.

                    print "average distance of the optimum in the testing segment:", numpy.mean(
                        list_dist_fixed_parameters), numpy.std(
                            list_dist_fixed_parameters
                        ), list_dist_fixed_parameters, "\n"
                    print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                        list_dist_at_ending_point_fixed_parameters
                    ), "SD final dist", numpy.std(
                        list_dist_at_ending_point_fixed_parameters
                    ), list_dist_at_ending_point_fixed_parameters, "\n"

                    histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_infection_memory_p" + str(
                        prob_infection
                    ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                        infect_threshold) + "_dose" + str(dose) + "_" + str(
                            Niter) + "iter_day" + str(
                                cutting_day) + "_A_F_inferred_middle.dat"

                    histograma_gral_negv_posit.histograma(
                        list_dist_at_ending_point_fixed_parameters,
                        histogram_filename)

                    histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_memory_p" + str(
                        prob_infection
                    ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                        infect_threshold) + "_dose" + str(dose) + "_" + str(
                            Niter) + "iter_day" + str(
                                cutting_day) + "_A_F_inferred_middle.dat"

                    histograma_bines_gral.histograma_bins(
                        list_dist_fixed_parameters, Nbins, histogram_filename2)

                    histogram_filename3 = "../Results/weight_shifts/histogr_sum_dist_testing_segment_infection_memory_p" + str(
                        prob_infection
                    ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                        infect_threshold) + "_dose" + str(dose) + "_" + str(
                            Niter) + "iter_day" + str(
                                cutting_day) + "_A_F_inferred_middle.dat"

                    #print list_dist_fixed_parameters_testing_segment
                    histograma_bines_gral.histograma_bins_zero(
                        list_dist_fixed_parameters_testing_segment, Nbins,
                        histogram_filename3)

                    print min(list_dist_fixed_parameters_testing_segment), max(
                        list_dist_fixed_parameters_testing_segment)

                    histogram_filename4 = "../Results/weight_shifts/histogr_abs_dist_point_by_point_infection_memory_p" + str(
                        prob_infection
                    ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                        infect_threshold) + "_dose" + str(dose) + "_" + str(
                            Niter) + "iter_day" + str(
                                cutting_day) + "_A_F_inferred_middle.dat"

                    histograma_gral_negv_posit.histograma(
                        list_abs_dist_point_by_point_indiv_simus_to_actual,
                        histogram_filename4)

                    histogram_filename5 = "../Results/weight_shifts/histogr_dist_point_by_point_infection_memory_p" + str(
                        prob_infection
                    ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                        infect_threshold) + "_dose" + str(dose) + "_" + str(
                            Niter) + "iter_day" + str(
                                cutting_day) + "_A_F_inferred_middle.dat"

                    histograma_gral_negv_posit.histograma(
                        list_dist_point_by_point_indiv_simus_to_actual,
                        histogram_filename5)

                    output_file10 = "../Results/weight_shifts/Summary_results_infection_memory_p" + str(
                        prob_infection
                    ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                        infect_threshold) + "_dose" + str(dose) + "_" + str(
                            Niter) + "iter_day" + str(
                                cutting_day) + "_A_F_inferred_middle.dat"
                    file10 = open(output_file10, 'wt')

                    print >> file10, "Summary results from best fit infection _memory with", Niter, "iter, and with values for the parameters:  prob_inf ", prob_infection, " prob immune: ", prob_Immune, "\n"

                    print >> file10, "average distance of the optimum in the testing segment:", numpy.mean(
                        list_dist_fixed_parameters), numpy.std(
                            list_dist_fixed_parameters
                        ), list_dist_fixed_parameters, "\n"
                    print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                        list_dist_at_ending_point_fixed_parameters
                    ), "SD final dist", numpy.std(
                        list_dist_at_ending_point_fixed_parameters
                    ), list_dist_at_ending_point_fixed_parameters, "\n"

                    print >> file10, "written optimum best fit evolution file:", output_file2
                    print >> file10, "written histogram file: ", histogram_filename

                    file10.close()

                    print "written Summary file: ", output_file10

                dose += delta_dose
            prob_infection += delta_prob
        prob_Immune += delta_prob_Immune
def main():

    Niter = 10000  #  for bootstrapping

    ####### input network files to collect the info from
    graph_name = "./network_all_users/GC_full_network_all_users_merged_small_comm_roles_diff_layers1_roles_diff_layers1.5.gml"

    ##################

    ######### i build the networks  (remember label attribute matches id in users table)
    G = nx.read_gml(graph_name)
    G_GC = nx.connected_component_subgraphs(G)[0]

    #    print "network size:", len(G.nodes()), "GC size:", len(G_GC.nodes())  # the network IS just the GC     1910

    ################
    csv_file = "analysis_time_bins_bmi_groups/master_users_file_weight_change_first6months_2w_ins.txt"
    #############

    dict_label_ck_id = {}
    dict_ck_id_label = {}

    dict_id_label = {}
    dict_label_id = {}
    for node in G.nodes():
        label = G.node[node]["label"]
        dict_id_label[node] = label
        dict_label_id[label] = node

    print "getting user's info from csv....."
    ################# getting info from csv

    file_csv_info = open(csv_file, 'r')
    list_lines_file_csv_info = file_csv_info.readlines()

    cont = 0
    cont_2wins = 0
    cont_small_clusters = 0
    cont_networked = 0
    cont_GC = 0
    cont_one_weigh_in = 0
    cont_non_networked = 0

    list_percent_weight_changes_6months_all_2wins = []
    list_percent_weight_changes_6months_networked = []
    list_percent_weight_changes_6months_non_networked = []
    list_percent_weight_changes_6months_GC = []
    list_percent_weight_changes_6months_small_clusters = []
    list_percent_weight_changes_6months_with_R6friends = []

    list_percent_weight_changes_6months_0R6s = []
    list_percent_weight_changes_6months_1R6s = []
    list_percent_weight_changes_6months_2R6s = []
    list_percent_weight_changes_6months_3R6s = []
    list_percent_weight_changes_6months_4R6s = []
    list_percent_weight_changes_6months_5R6s = []
    list_percent_weight_changes_6months_6R6s = []

    cont_fat_fingers = 0
    list_users = []
    for line in list_lines_file_csv_info:
        if cont > 0:
            list_elements_line = line.strip("\r\n").split(" ")

            ck_id = str(list_elements_line[0])
            label = str(list_elements_line[1])

            if ck_id not in list_users:
                list_users.append(ck_id)

            dict_label_ck_id[label] = ck_id
            dict_ck_id_label[ck_id] = label

            weigh_change_6months = float(list_elements_line[2])
            percent_weight_change_6months = float(list_elements_line[3])

            if percent_weight_change_6months < 100. and percent_weight_change_6months > -100.:

                number_weigh_ins_6months = int(list_elements_line[4])
                activity_6months = int(list_elements_line[5])
                degree = int(list_elements_line[20])
                p_friends = int(list_elements_line[15])

                if number_weigh_ins_6months >= 2:
                    cont_2wins += 1
                    list_percent_weight_changes_6months_all_2wins.append(
                        percent_weight_change_6months)

                    if p_friends == 1:
                        cont_networked += 1
                        list_percent_weight_changes_6months_networked.append(
                            percent_weight_change_6months)

                        if label in dict_label_id:  #GC
                            node = dict_label_id[label]
                            list_percent_weight_changes_6months_GC.append(
                                percent_weight_change_6months)
                            cont_GC += 1

                            if G.node[node]["R6_overlap"] > 0:
                                list_percent_weight_changes_6months_with_R6friends.append(
                                    percent_weight_change_6months)

                                if G.node[node]["R6_overlap"] == 1:
                                    list_percent_weight_changes_6months_1R6s.append(
                                        percent_weight_change_6months)
                                elif G.node[node]["R6_overlap"] == 2:
                                    list_percent_weight_changes_6months_2R6s.append(
                                        percent_weight_change_6months)
                                elif G.node[node]["R6_overlap"] == 3:
                                    list_percent_weight_changes_6months_3R6s.append(
                                        percent_weight_change_6months)
                                elif G.node[node]["R6_overlap"] == 4:
                                    list_percent_weight_changes_6months_4R6s.append(
                                        percent_weight_change_6months)
                                elif G.node[node]["R6_overlap"] == 5:
                                    list_percent_weight_changes_6months_5R6s.append(
                                        percent_weight_change_6months)
                                elif G.node[node]["R6_overlap"] >= 6:
                                    list_percent_weight_changes_6months_6R6s.append(
                                        percent_weight_change_6months)

                            else:
                                list_percent_weight_changes_6months_0R6s.append(
                                    percent_weight_change_6months)

                        else:
                            list_percent_weight_changes_6months_small_clusters.append(
                                percent_weight_change_6months)
                            cont_small_clusters += 1

                    else:
                        list_percent_weight_changes_6months_non_networked.append(
                            percent_weight_change_6months)
                        cont_non_networked += 1
                else:
                    cont_one_weigh_in += 1

            else:
                cont_fat_fingers += 1

        cont += 1

    ##############

    print "number of fat fingers:", cont_fat_fingers, "(excluded)"

    print "total sample size:", len(
        list_users), "\n with >=2 w-ins:", cont_2wins

    print "networked:", cont_networked, "   non-networked:", cont_non_networked, "\nsize GC from csv:", cont_GC, "\nsmall clusters:", cont_small_clusters
    print "users with just one weigh in:", cont_one_weigh_in

    print "\navg. percent weight change:"
    print "  all with two w-ins:", numpy.mean(
        list_percent_weight_changes_6months_all_2wins), "+/-", numpy.std(
            list_percent_weight_changes_6months_all_2wins) / numpy.sqrt(
                float(len(list_percent_weight_changes_6months_all_2wins) -
                      1.)), "set size:", len(
                          list_percent_weight_changes_6months_all_2wins)
    print "  non networked:", numpy.mean(
        list_percent_weight_changes_6months_non_networked
    ), "+/-", numpy.std(
        list_percent_weight_changes_6months_non_networked) / numpy.sqrt(
            float(len(list_percent_weight_changes_6months_non_networked) -
                  1.)), "set size:", len(
                      list_percent_weight_changes_6months_non_networked)

    print "  networked:", numpy.mean(
        list_percent_weight_changes_6months_networked), "+/-", numpy.std(
            list_percent_weight_changes_6months_networked) / numpy.sqrt(
                float(len(list_percent_weight_changes_6months_networked) -
                      1.)), "set size:", len(
                          list_percent_weight_changes_6months_networked)

    print "  small clusters:", numpy.mean(
        list_percent_weight_changes_6months_small_clusters), "+/-", numpy.std(
            list_percent_weight_changes_6months_small_clusters) / numpy.sqrt(
                float(
                    len(list_percent_weight_changes_6months_small_clusters) -
                    1.)), "set size:", len(
                        list_percent_weight_changes_6months_small_clusters)

    print "  GC:", numpy.mean(
        list_percent_weight_changes_6months_GC
    ), "+/-", numpy.std(list_percent_weight_changes_6months_GC) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_GC) -
              1.)), "set size:", len(list_percent_weight_changes_6months_GC)

    print "  without R6s:", numpy.mean(
        list_percent_weight_changes_6months_0R6s
    ), "+/-", numpy.std(list_percent_weight_changes_6months_0R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_0R6s) -
              1.)), "set size:", len(list_percent_weight_changes_6months_0R6s)

    print "  with R6s:", numpy.mean(
        list_percent_weight_changes_6months_with_R6friends), "+/-", numpy.std(
            list_percent_weight_changes_6months_with_R6friends) / numpy.sqrt(
                float(
                    len(list_percent_weight_changes_6months_with_R6friends) -
                    1.)), "set size:", len(
                        list_percent_weight_changes_6months_with_R6friends
                    ), "\n"

    ##########################################################
    ############  Bootstrap for comparing the different sets:

    print "\n\nComparing Networked population vs. 2-weigh-in population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_all_2wins,
                len(list_percent_weight_changes_6months_networked)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic networked population weight change vs. 2weigh-in pop.:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_networked)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_networked_from_2wins_weight_changes.dat"
    )

    #####
    print "\n\nComparing GC population vs. 2-weigh-in population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_all_2wins,
                len(list_percent_weight_changes_6months_GC)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic GC population weight change vs. 2weigh-in population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_GC)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_GC_from_2wins_weight_changes.dat"
    )

    #####
    print "\n\nComparing R6s friends population vs. 2-weigh-in population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_all_2wins,
                len(list_percent_weight_changes_6months_with_R6friends)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic R6s friends population weight change vs. 2weigh-in population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_with_R6friends)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_R6s_friends_from_2wins_weight_changes.dat"
    )

    #####
    print "\n\nComparing Small clusters population vs. 2-weigh-in population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_all_2wins,
                len(list_percent_weight_changes_6months_small_clusters)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic small clusters population weight change vs. 2weigh-in population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_small_clusters)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_small_clusters_from_2wins_weight_changes.dat"
    )

    #####
    print "\n\nComparing Non-networked population vs. 2-weigh-in population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_all_2wins,
                len(list_percent_weight_changes_6months_non_networked)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic Non networked population weight change vs. 2weigh-in population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_non_networked)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_non_networked_from_2wins_weight_changes.dat"
    )

    #######
    #########

    print "\n\nComparing GC population vs Networked population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_networked,
                len(list_percent_weight_changes_6months_GC)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic GC population weight change vs. Networked population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_GC)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_GC_from_networked_weight_changes.dat"
    )

    #####

    print "\n\nComparing R6s friends population vs Networked population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_networked,
                len(list_percent_weight_changes_6months_with_R6friends)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic R6s friends population weight change vs. Networked population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_with_R6friends)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_with_R6friends_from_networked_weight_changes.dat"
    )

    #####

    print "\n\nComparing small clusters population vs Networked population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_networked,
                len(list_percent_weight_changes_6months_small_clusters)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic small clusters population weight change vs. Networked population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_small_clusters)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_small_clusters_from_networked_weight_changes.dat"
    )

    #####
    #####

    print "\n\nComparing R6s friends population vs GC population"
    list_synthetic_averages_for_distribution = []  # 2 w-ins vs networked pop.
    for i in range(Niter):
        synthetic_mean = numpy.mean(
            sample_with_replacement(
                list_percent_weight_changes_6months_GC,
                len(list_percent_weight_changes_6months_with_R6friends)))
        list_synthetic_averages_for_distribution.append(synthetic_mean)
    #  print  synthetic_mean

    print "average all synthetic values:", numpy.mean(
        list_synthetic_averages_for_distribution), "+/-", numpy.std(
            list_synthetic_averages_for_distribution)

    print "z-score synthetic R6s friends  population weight change vs. GC population:", (
        numpy.mean(list_synthetic_averages_for_distribution) -
        numpy.mean(list_percent_weight_changes_6months_with_R6friends)
    ) / numpy.std(list_synthetic_averages_for_distribution)

    histograma_bines_gral.histograma_bins(
        list_synthetic_averages_for_distribution, 50,
        "./analysis_time_bins_bmi_groups/histogram_synthetic_with_R6friends_from_GC_weight_changes.dat"
    )

    print "\n"

    ####################

    histograma_bines_gral.histograma_bins(
        list_percent_weight_changes_6months_all_2wins, 50,
        "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_2weigh_ins.dat"
    )

    histograma_bines_gral.histograma_bins(
        list_percent_weight_changes_6months_networked, 50,
        "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_networked.dat"
    )

    histograma_bines_gral.histograma_bins(
        list_percent_weight_changes_6months_non_networked, 50,
        "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_non_networked.dat"
    )

    histograma_bines_gral.histograma_bins(
        list_percent_weight_changes_6months_GC, 50,
        "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_GC.dat"
    )

    histograma_bines_gral.histograma_bins(
        list_percent_weight_changes_6months_small_clusters, 50,
        "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_small_clusters.dat"
    )

    histograma_bines_gral.histograma_bins(
        list_percent_weight_changes_6months_with_R6friends, 50,
        "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_with_R6friends.dat"
    )

    histograma_bines_gral.histograma_bins(
        list_percent_weight_changes_6months_0R6s, 50,
        "./analysis_time_bins_bmi_groups/histogram_real_weigh_change_distrib_without_0R6s_friends.dat"
    )

    #################

    print "\nRegarding weight change and having one or more R6s as friends:"
    print "  0 R6s:", numpy.mean(
        list_percent_weight_changes_6months_0R6s
    ), numpy.std(list_percent_weight_changes_6months_0R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_0R6s) -
              1.)), " size:", len(list_percent_weight_changes_6months_0R6s)
    print "  1 R6s:", numpy.mean(
        list_percent_weight_changes_6months_1R6s
    ), numpy.std(list_percent_weight_changes_6months_1R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_1R6s) -
              1.)), " size:", len(list_percent_weight_changes_6months_1R6s)
    print "  2 R6s:", numpy.mean(
        list_percent_weight_changes_6months_2R6s
    ), numpy.std(list_percent_weight_changes_6months_2R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_2R6s) -
              1.)), " size:", len(list_percent_weight_changes_6months_2R6s)
    print "  3 R6s:", numpy.mean(
        list_percent_weight_changes_6months_3R6s
    ), numpy.std(list_percent_weight_changes_6months_3R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_3R6s) -
              1.)), " size:", len(list_percent_weight_changes_6months_3R6s)
    print "  4 R6s:", numpy.mean(
        list_percent_weight_changes_6months_4R6s
    ), numpy.std(list_percent_weight_changes_6months_4R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_4R6s) -
              1.)), " size:", len(list_percent_weight_changes_6months_4R6s)
    print "  5 R6s:", numpy.mean(
        list_percent_weight_changes_6months_5R6s
    ), numpy.std(list_percent_weight_changes_6months_5R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_5R6s) -
              1.)), " size:", len(list_percent_weight_changes_6months_5R6s)
    print "  >= 6 R6s:", numpy.mean(
        list_percent_weight_changes_6months_6R6s
    ), numpy.std(list_percent_weight_changes_6months_6R6s) / numpy.sqrt(
        float(len(list_percent_weight_changes_6months_5R6s) -
              1.)), " size:", len(list_percent_weight_changes_6months_6R6s)
Ejemplo n.º 17
0
def main(graph_name_GC):

    H1 = nx.read_gml(graph_name_GC)  # just GC, but with Role info
    H1 = nx.connected_component_subgraphs(H1)[0]

    print len(H1.nodes())

    list_R6_labels = []
    dicc_label_node = {}
    list_network_ids = []
    for node in H1.nodes():

        if (H1.node[node]['role'] == "special_R6"):
            H1.node[node]['role'] = "R6"

        list_network_ids.append(
            int(H1.node[node]['label'])
        )  # this actually corresponds to the id from the users table in the DB
        dicc_label_node[int(H1.node[node]['label'])] = node

        if (H1.node[node]['role'] == "R6"):
            list_R6_labels.append(
                int(H1.node[node]['label'])
            )  # this actually corresponds to the id from the users table in the DB

    #print "# R6s:",len(list_R6_labels)

#  print len(dicc_label_node)

    database = "calorie_king_social_networking_2010"
    server = "tarraco.chem-eng.northwestern.edu"
    user = "******"
    passwd = "n1ckuDB!"

    db = Connection(server, database, user, passwd)

    query1 = """select * from users"""
    result1 = db.query(query1)  # is a list of dict.

    file1 = open("num_messg_to_friends_vs_Gini.dat", 'wt')
    file2 = open("num_messg_from_friends_vs_Gini.dat", 'wt')
    file3 = open("num_messg_friends_vs_Gini.dat", 'wt')

    file11 = open("num_messg_to_friends_vs_Gini_R6s.dat", 'wt')
    file12 = open("num_messg_from_friends_vs_Gini_R6s.dat", 'wt')
    file13 = open("num_messg_friends_vs_Gini_R6s.dat", 'wt')

    file111 = open("num_messg_to_friends_vs_Gini_R6overlap.dat", 'wt')
    file112 = open("num_messg_from_friends_vs_Gini_R6overlap.dat", 'wt')
    file113 = open("num_messg_friends_vs_Gini_R6overlap.dat", 'wt')

    file211 = open("num_blog_posts.dat", 'wt')
    file212 = open("num_home_page_posts.dat", 'wt')
    file213 = open("num_lesson_comments.dat", 'wt')
    file214 = open("num_forum_posts.dat", 'wt')
    file214 = open("num_tot_public_messages.dat", 'wt')

    dict_characteristics_users = {}

    dicc_ck_label = {}
    for r1 in result1:  #first i build a dicc ck_id vs. label
        ck_id = r1['ck_id']
        label = int(
            r1['id'])  # this corresponds to the 'label' in the gml files
        dicc_ck_label[ck_id] = label

        try:
            node = dicc_label_node[label]
            H1.node[node]['ck_id'] = ck_id
        #  print "\n",H1.node[node]['ck_id'], label
        except KeyError:
            pass

    print len(dicc_ck_label)

    list_sent_from_not_friends = []
    list_sent_to_not_friends = []
    list_tot_sent = []
    list_tot_received = []

    list_to_friends = []
    list_from_friends = []
    list_tot_messg_friends = []

    list_GINI_weighted_to_friends = []  # one value per USER
    list_GINI_weighted_from_friends = []
    list_GINI_weighted_tot_messg_friends = []

    list_GINI_weighted_to_friends_R6s = []  # one value per USER
    list_GINI_weighted_from_friends_R6s = []
    list_GINI_weighted_tot_messg_friends_R6s = []

    list_GINI_weighted_to_friends_R6overlap = []  # one value per USER
    list_GINI_weighted_from_friends_R6overlap = []
    list_GINI_weighted_tot_messg_friends_R6overlap = []

    list_weights_friendships = []

    list_weights_friendships_with_R6s = []
    list_weights_friendships_to_R6s = []
    list_weights_friendships_from_R6s = []

    list_blog_posts = []
    list_home_page = []
    list_forum_posts = []
    list_lesson_com = []
    list_tot_public_mess = []

    num_users = 0.
    for r1 in result1:  #loop over users
        num_users += 1.

        print int(num_users)
        ck_id = r1['ck_id']
        label = int(
            r1['id'])  # this corresponds to the 'label' in the gml files
        try:
            node = dicc_label_node[label]
        except KeyError:
            pass

        query6 = "SELECT * FROM activity_combined where activity_flag != 'WI' and  activity_flag != 'PM' and ck_id='" + str(
            ck_id) + "'   "
        result6 = db.query(query6)

        tot_public_mess = len(result6)

        blog_posts = 0
        home_page = 0
        forum_posts = 0
        lesson_com = 0
        for r6 in result6:

            if r6['activity_flag'] == 'BP':
                blog_posts += 1
            elif r6['activity_flag'] == 'HP':
                home_page += 1
            elif r6['activity_flag'] == 'FP':
                forum_posts += 1
            elif r6['activity_flag'] == 'LC':
                lesson_com += 1

        list_blog_posts.append(blog_posts)
        list_home_page.append(home_page)
        list_forum_posts.append(forum_posts)
        list_lesson_com.append(lesson_com)
        list_tot_public_mess.append(tot_public_mess)

        print ck_id, tot_public_mess, blog_posts, home_page, forum_posts, lesson_com

        # if num_users <=5000:    # JUST TO TEST THE CODE

        if label in list_network_ids:  # if the user is in the network, i check how many messages they send each other
            to_not_friends = 0
            from_not_friends = 0
            print "\n\nnode label", label, ck_id, "has degree:", H1.degree(
                node)

            query2 = "select  * from friends where (src ='" + str(
                ck_id) + "')or (dest ='" + str(ck_id) + "') "
            result2 = db.query(query2)
            degree = len(result2)

            query3 = "select  * from private_messages where (src_id ='" + str(
                ck_id
            ) + "')  "  # there are messages sent by/to people not in the Users table, that is because they join the system prior 1-jan-2009, and are not part of the 47,000 users.
            result3 = db.query(query3)
            num_sent = float(len(result3))
            list_tot_sent.append(num_sent)

            for r3 in result3:  # i count how many messages are sent to friends and non-friends

                ck_friend = r3['dest_id']
                if ck_friend in dicc_ck_label:  #  because some messages are NOT sent by users (join date prior jan.2009)
                    label_friend = dicc_ck_label[ck_friend]

                    if label_friend in dicc_label_node:
                        node_friend = dicc_label_node[label_friend]

                        flag_friend = 0
                        node_sender = dicc_label_node[
                            label]  # the user i am currently studying
                        for n in H1.neighbors(node_sender):
                            if n == node_friend:
                                flag_friend = 1

                        if flag_friend == 0:
                            to_not_friends += 1

            query4 = "select  * from private_messages where (dest_id ='" + str(
                ck_id) + "')  "
            result4 = db.query(query4)
            num_received = float(len(result4))
            list_tot_received.append(float(num_received))

            for r4 in result4:  # i count how many messages are from friends and non-friends

                ck_friend = str(r4['src_id'])
                if ck_friend in dicc_ck_label:  # i double check, because some messages are NOT sent by users...(join date prior jan.2009)
                    label_friend = dicc_ck_label[ck_friend]

                    if label_friend in dicc_label_node:
                        node_friend = dicc_label_node[label_friend]

                        flag_friend = 0
                        node_receiver = dicc_label_node[label]
                        for n in H1.neighbors(node_receiver):
                            if n == node_friend:
                                flag_friend = 1

                        if flag_friend == 0:
                            from_not_friends += 1

            query5 = "select  * from private_messages where (src_id ='" + str(
                ck_id) + "')or (dest_id ='" + str(
                    ck_id) + "') "  # all messages
            result5 = db.query(query5)
            num_tot_messg = float(len(result5))

            num_messg_friends = 0.
            num_messg_to_friends = 0.
            num_messg_from_friends = 0.
            flag_sent = 0
            flag_received = 0

            list_weighted_to_friends = [
            ]  # one value per FRIEND of a given user
            list_weighted_from_friends = []
            list_weighted_tot_messg_friends = []

            list_weighted_to_friends_norm = [
            ]  # one value per FRIEND of a given user, normalized by the tot number of messages that user sent
            list_weighted_from_friends_norm = []
            list_weighted_tot_messg_friends_norm = []

            list_weighted_to_friends_R6s_norm = [
            ]  # one value per FRIEND of a given user, normalized by the tot number of messages that user sent
            list_weighted_from_friends_R6s_norm = []
            list_weighted_tot_messg_friends_R6s_norm = []

            for f in H1.neighbors(node):

                messg_to_one_friend = 0.  #looking at a particular friend
                messg_from_one_friend = 0.
                messg_one_friend = 0.

                for r5 in result5:

                    if r5['src_id'] == ck_id and r5['dest_id'] == H1.node[f][
                            'ck_id']:
                        num_messg_to_friends += 1.
                        num_messg_friends += 1.
                        flag_sent = 1

                        messg_to_one_friend += 1.
                        messg_one_friend += 1.

                    elif r5['dest_id'] == ck_id and r5['src_id'] == H1.node[f][
                            'ck_id']:
                        num_messg_from_friends += 1.
                        num_messg_friends += 1.
                        flag_received = 1

                        messg_from_one_friend += 1.
                        messg_one_friend += 1.

                list_weighted_to_friends.append(
                    messg_to_one_friend
                )  # weight of each friendship    (not normalized)
                list_weighted_from_friends.append(messg_from_one_friend)
                list_weighted_tot_messg_friends.append(messg_one_friend)

                if H1.node[f]['role'] == 'R6':  #if the friend is an R6s
                    list_weights_friendships_to_R6s.append(
                        messg_from_one_friend)
                    list_weighted_to_friends_R6s_norm.append(
                        messg_to_one_friend)

                if H1.node[node]['role'] == 'R6':
                    list_weights_friendships_from_R6s.append(
                        messg_to_one_friend)
                    list_weighted_from_friends_R6s_norm.append(
                        messg_from_one_friend)

                if H1.node[node]['role'] == 'R6' or H1.node[f]['role'] == 'R6':
                    list_weights_friendships_with_R6s.append(messg_one_friend)
                    list_weighted_tot_messg_friends_R6s_norm.append(
                        messg_one_friend)

            for item in list_weighted_tot_messg_friends:
                if sum(list_weighted_tot_messg_friends) > 0:
                    list_weighted_tot_messg_friends_norm.append(
                        item / sum(list_weighted_tot_messg_friends))

            for item in list_weighted_to_friends:
                if sum(list_weighted_to_friends) > 0:
                    list_weighted_to_friends_norm.append(
                        item / sum(list_weighted_to_friends))

            for item in list_weighted_from_friends:
                if sum(list_weighted_from_friends) > 0:
                    list_weighted_from_friends_norm.append(
                        item / sum(list_weighted_from_friends))

            for i in range(
                    len(list_weighted_tot_messg_friends_R6s_norm)
            ):  # how important is the communication with any R6 friend, compare to the tot # messag
                if sum(list_weighted_tot_messg_friends_R6s_norm) > 0:
                    list_weighted_tot_messg_friends_R6s_norm[
                        i] = list_weighted_tot_messg_friends_R6s_norm[
                            i] / float(
                                sum(list_weighted_tot_messg_friends_R6s_norm))

            for i in range(len(list_weighted_to_friends_R6s_norm)):
                if sum(list_weighted_to_friends_R6s_norm) > 0:
                    list_weighted_to_friends_R6s_norm[
                        i] = list_weighted_to_friends_R6s_norm[i] / float(
                            sum(list_weighted_to_friends_R6s_norm))

            for i in range(len(list_weighted_from_friends_R6s_norm)):
                if sum(list_weighted_from_friends_R6s_norm) > 0:
                    list_weighted_from_friends_R6s_norm[
                        i] = list_weighted_from_friends_R6s_norm[i] / float(
                            sum(list_weighted_from_friends_R6s_norm))

# no puedo normalizar over and over again los primero elementos muchas mas veces que los ultimos agnadidos!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            for i in range(
                    len(list_weighted_tot_messg_friends)
            ):  # how important is the communication with any R6 friend, compare to the tot # messag
                list_weights_friendships_with_R6s.append(
                    list_weighted_tot_messg_friends[i])

            for i in range(len(list_weighted_to_friends)):
                list_weights_friendships_to_R6s.append(
                    list_weighted_to_friends[i])

            for i in range(len(list_weighted_from_friends)):
                list_weights_friendships_from_R6s.append(
                    list_weighted_from_friends[i])

            list_to_friends.append(num_messg_to_friends)
            list_from_friends.append(num_messg_from_friends)
            list_tot_messg_friends.append(num_messg_friends)

            #  print "norm list weighted tot friendships:",list_weighted_tot_messg_friends_norm,"with R6s:",list_weights_friendships_with_R6s

            # i calculate how skewed friendships for a given user are:
            if len(list_weighted_to_friends) > 0 and sum(
                    list_weighted_to_friends) > 0:
                Gini_to_friends = GINI_coef.calculate_GINI(
                    list_weighted_to_friends)
                list_GINI_weighted_to_friends.append(
                    Gini_to_friends)  # one value per USER

                #  print  ",to friends: ", list_weighted_to_friends, sum(list_weighted_to_friends),Gini_to_friends
                print >> file1, H1.degree(
                    node), sum(list_weighted_to_friends
                               ), sum(list_weighted_to_friends) / float(
                                   H1.degree(node)), Gini_to_friends

                if (H1.node[node]['role'] == "R6"):
                    list_GINI_weighted_to_friends_R6s.append(Gini_to_friends)
                    print >> file11, H1.degree(
                        node), sum(list_weighted_to_friends
                                   ), sum(list_weighted_to_friends) / float(
                                       H1.degree(node)), Gini_to_friends

                if (H1.node[node]['R6_overlap'] > 0):
                    list_GINI_weighted_to_friends_R6overlap.append(
                        Gini_to_friends)
                    print >> file111, H1.degree(
                        node), sum(list_weighted_to_friends
                                   ), sum(list_weighted_to_friends) / float(
                                       H1.degree(node)), Gini_to_friends

            if len(list_weighted_from_friends) > 0 and sum(
                    list_weighted_from_friends) > 0:
                Gini_from_friends = GINI_coef.calculate_GINI(
                    list_weighted_from_friends)
                list_GINI_weighted_from_friends.append(Gini_from_friends)

                # print  ",from friends: ", list_weighted_from_friends, sum(list_weighted_from_friends),Gini_from_friends
                print >> file2, H1.degree(
                    node), sum(list_weighted_from_friends
                               ), sum(list_weighted_from_friends) / float(
                                   H1.degree(node)), Gini_from_friends

                if (H1.node[node]['role'] == "R6"):
                    list_GINI_weighted_from_friends_R6s.append(
                        Gini_from_friends)
                    print >> file12, H1.degree(
                        node), sum(list_weighted_from_friends
                                   ), sum(list_weighted_from_friends) / float(
                                       H1.degree(node)), Gini_from_friends

                if (H1.node[node]['R6_overlap'] > 0):
                    list_GINI_weighted_from_friends_R6overlap.append(
                        Gini_from_friends)
                    print >> file112, H1.degree(
                        node), sum(list_weighted_from_friends
                                   ), sum(list_weighted_from_friends) / float(
                                       H1.degree(node)), Gini_from_friends

            if len(list_weighted_tot_messg_friends) > 0 and sum(
                    list_weighted_from_friends) > 0:
                Gini_friends = GINI_coef.calculate_GINI(
                    list_weighted_tot_messg_friends)
                list_GINI_weighted_tot_messg_friends.append(Gini_friends)

                #  print  ",tot: ",list_weighted_tot_messg_friends , sum(list_weighted_tot_messg_friends),Gini_friends
                print >> file3, H1.degree(
                    node), sum(list_weighted_tot_messg_friends
                               ), sum(list_weighted_tot_messg_friends) / float(
                                   H1.degree(node)), Gini_friends

                if (H1.node[node]['role'] == "R6"):
                    list_GINI_weighted_tot_messg_friends_R6s.append(
                        Gini_friends)
                    print >> file13, H1.degree(node), sum(
                        list_weighted_tot_messg_friends
                    ), sum(list_weighted_tot_messg_friends) / float(
                        H1.degree(node)), Gini_friends

                if (H1.node[node]['R6_overlap'] > 0):
                    list_GINI_weighted_tot_messg_friends_R6overlap.append(
                        Gini_friends)
                    print >> file113, H1.degree(node), sum(
                        list_weighted_tot_messg_friends
                    ), sum(list_weighted_tot_messg_friends) / float(
                        H1.degree(node)), Gini_friends

            if num_received != 0:
                list_sent_from_not_friends.append(float(from_not_friends))
            if num_sent != 0:
                list_sent_to_not_friends.append(float(to_not_friends))

    file1.close()
    file2.close()
    file3.close()

    print "average from_not_friends:", numpy.mean(list_sent_from_not_friends)
    print "average to_not_friends:", numpy.mean(list_sent_to_not_friends)

    print "average to_friends:", numpy.mean(list_to_friends)
    print "average from_friends:", numpy.mean(list_from_friends)
    print "average tot messg friends:", numpy.mean(list_tot_messg_friends)

    print "average tot sent:", numpy.mean(list_tot_sent)
    print "average tot received:", numpy.mean(list_tot_received)

    histograma_gral.histograma(list_sent_from_not_friends, "not_from_friends")
    histograma_gral.histograma(list_sent_to_not_friends, "not_to_friends")

    histograma_gral.histograma(list_tot_sent, "tot_sent")
    histograma_gral.histograma(list_tot_received, "tot_received")

    histograma_gral.histograma(
        list_to_friends, "to_friends")  #data, string_for_output_file_name
    histograma_gral.histograma(list_from_friends, "from_friends")
    histograma_gral.histograma(list_tot_messg_friends, "tot_friends")

    histograma_bines_gral.histograma_bins(list_GINI_weighted_to_friends, 75,
                                          "Gini_weight_to_friends")
    histograma_bines_gral.histograma_bins(list_GINI_weighted_from_friends, 75,
                                          "Gini_weight_from_friends")
    histograma_bines_gral.histograma_bins(list_GINI_weighted_tot_messg_friends,
                                          75, "Gini_weight_tot_friends")

    histograma_bines_gral.histograma_bins(list_GINI_weighted_to_friends_R6s,
                                          75, "Gini_weight_to_friends_R6s")
    histograma_bines_gral.histograma_bins(list_GINI_weighted_from_friends_R6s,
                                          75, "Gini_weight_from_friends_R6s")
    histograma_bines_gral.histograma_bins(
        list_GINI_weighted_tot_messg_friends_R6s, 75,
        "Gini_weight_tot_friends_R6s")

    histograma_bines_gral.histograma_bins(
        list_GINI_weighted_to_friends_R6overlap, 75,
        "Gini_weight_to_friends_R6overlap")
    histograma_bines_gral.histograma_bins(
        list_GINI_weighted_from_friends_R6overlap, 75,
        "Gini_weight_from_friends_R6overlap")
    histograma_bines_gral.histograma_bins(
        list_GINI_weighted_tot_messg_friends_R6overlap, 75,
        "Gini_weight_tot_friends_R6overlap")

    histograma_gral.histograma(list_blog_posts, "num_blog_posts")
    histograma_gral.histograma(list_home_page, "num_home_page_posts")
    histograma_gral.histograma(list_forum_posts, "num_forum_posts")
    histograma_gral.histograma(list_lesson_com, "num_lesson_com")
    histograma_gral.histograma(list_tot_public_mess, "num_tot_public_messages")
Ejemplo n.º 18
0
def main():

    file2 = open("./Results/Scatter_plot_length_slope_lin.dat", 'wt')
    file3 = open("./Results/Scatter_plot_tau_deltaY_exp.dat", 'wt')

    file4 = open("./Results/Summary_results_cutting_time_series.dat", 'wt')

    database = "calorie_king_social_networking_2010"
    server = "tarraco.chem-eng.northwestern.edu"
    user = "******"
    passwd = "n1ckuDB!"

    db = Connection(server, database, user, passwd)

    query = """select * from weigh_in_cuts order by id, start_day"""
    result = db.query(query)  # is a list of dict.

    list_distinct_users = []
    list_num_segments_per_user = []

    list_quality_values_lin = []  #DW score
    list_quality_values_con = []  #DW score
    list_quality_values_exp = []  #DW score

    list_pairs_tau_deltaW = []
    list_pairs_slope_time_length = []

    num_segments = 0
    num_lin_segments = 0
    num_con_segments = 0
    num_exp_segments = 0
    num_isolates = 0

    num_segments_per_user = 0
    for line in result:  # each line is a dict, each line is a segment

        user = line['ck_id']
        fit_type = str(line['fit_type'])
        start_day = int(line['start_day'])
        stop_day = int(line['stop_day'])
        start_weight = float(line['start_weight'])
        stop_weight = float(line['stop_weight'])

        if fit_type != "isolated":  # isolated datapoint (with gaps at both sides)
            num_segments += 1

            try:
                quality = float(line['quality'])
            except TypeError:
                print user
                raw_input(
                )  #                pass  # isolated points dont have quality

    #parameters for linear:  1:cte,  2:slope. for exponential:  1:cte, 2:multiplicative_cte, 3:multipli_cte_in_the_exp
            param1 = float(line['param1'])

            try:
                param2 = float(line['param2'])

            except TypeError:
                print "\nconstant segment!", user
                param2 = 0.

            try:
                param3 = float(line['param3'])
            except TypeError:
                pass  # cos the linear segments dont have a param3

            print "\n", user

            if user not in list_distinct_users:
                list_distinct_users.append(user)
                if num_segments_per_user != 0:
                    list_num_segments_per_user.append(
                        num_segments_per_user
                    )  # i save the value from the previous user before starting the count for this one.

                num_segments_per_user = 1
            else:
                num_segments_per_user += 1

            if fit_type == "linear" or fit_type == "constant":

                if fit_type == "linear":
                    num_lin_segments += 1
                    list_quality_values_lin.append(quality)

                elif fit_type == "constant":
                    num_con_segments += 1
                    list_quality_values_con.append(quality)

                tupla = []

                tupla.append(float(stop_day - start_day + 1.))
                tupla.append(param2)
                list_pairs_slope_time_length.append(tupla)

            elif fit_type == "exponent":
                num_exp_segments += 1

                list_quality_values_exp.append(quality)

                tupla = []

                tupla.append(1. / param3)
                tupla.append(
                    float(stop_weight - start_weight)
                )  # FAKE VALUES FOR NOW!!!!  CAMBIAR ESTO POR LOS NOMBRES DE LOS CAMPOS QUE AUN NO EXISTEN: startY , stopY

                list_pairs_tau_deltaW.append(tupla)

            else:
                print fit_type, "nor lin nor exp!", type(fit_type),

        else:
            num_isolates += 1

    histograma_gral.histograma(
        list_num_segments_per_user,
        "./Results/Distribution_num_segments_per_user.dat")

    for item in list_pairs_slope_time_length:
        print >> file2, item[0], item[1]
    file2.close()

    for item in list_pairs_tau_deltaW:
        print >> file3, item[0], item[1]
    file3.close()

    print >> file4, "Summary results cutting time series:\n\n"

    print >> file4, "Number of users:", len(
        list_distinct_users), "(with at least 20 weigh-ins)"
    print >> file4, "Number of segments:", num_lin_segments + num_con_segments + num_exp_segments  #not including one-point segments
    print >> file4, "Average number of segments per individual:", num_segments / float(
        len(list_distinct_users))
    print >> file4, "Number of one-point segments:", num_isolates
    print >> file4, "Number segments by type:"
    print >> file4, "    Linear: ", num_lin_segments
    print >> file4, "    Constant: ", num_con_segments
    print >> file4, "    Exponential: ", num_exp_segments, "\n"
    print >> file4, "Regarding the goodness of the fits, DW average score:"
    print >> file4, "    Linear: ", numpy.mean(list_quality_values_lin)
    print >> file4, "    Constant: ", numpy.mean(list_quality_values_con)
    print >> file4, "    Exponential: ", numpy.mean(list_quality_values_exp)

    file4.close()

    print len(list_quality_values_lin), list_quality_values_lin
    print len(list_quality_values_con), list_quality_values_con
    print len(list_quality_values_exp), list_quality_values_exp

    histograma_bines_gral.histograma_bins(
        list_quality_values_lin, 10,
        "./Results/Distribution_DW_scores_lin_segments.dat")
    histograma_bines_gral.histograma_bins(
        list_quality_values_con, 10,
        "./Results/Distribution_DW_scores_const_segments.dat")
    histograma_bines_gral.histograma_bins(
        list_quality_values_exp, 10,
        "./Results/Distribution_DW_scores_exp_segments.dat")

    print "\n done!"
Ejemplo n.º 19
0
def main(graph_name):

    G = nx.read_gml(graph_name)

    for_testing_fixed_set = "YES"  # when YES, fixed values param, to get all statistics on final distances etc
    # change the range for the parameters accordingly

    envelopes = "YES"

    Niter = 1000  # 100 iter seems to be enough (no big diff. with respect to 1000it)

    percent_envelope = 95.

    list_id_weekends_T3 = look_for_T3_weekends(
        G
    )  # T3 doesnt share fellows in the weekend  (but they are the exception)
    Nbins = 20  # for the histogram of sum of distances

    cutting_day = 175  # i use this only for the filenames

    all_team = "NO"  # as adopters or not

    dir_real_data = '../Results/'
    dir = "../Results/weight_shifts/infection/"

    delta_end = 3.  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)

    if for_testing_fixed_set == "NO":
        output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_" + str(
            Niter) + "iter.dat"
        file3 = open(output_file3, 'wt')

        file3.close()

######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################

    if all_team == "YES":
        print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
        exit()

    else:
        filename_actual_evol = "../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"

    file1 = open(
        filename_actual_evol, 'r'
    )  ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
    list_lines_file = file1.readlines()

    list_actual_evol = []
    for line in list_lines_file:  # [1:]:   # i exclude the first row

        num_adopters = float(line.split(" ")[1])
        list_actual_evol.append(num_adopters)

##################################################################

#../Results/weight_shifts/infection/Average_time_evolution_Infection_training_p0.8_Immune0.3_1000iter_2012_avg_ic_day125.dat ESTOS VALORES SON EL OPTIMUM FIT THE 152-DIAS
    prob_min = 0.1
    prob_max = 0.101
    delta_prob = 0.1

    prob_Immune_min = 0.00
    prob_Immune_max = 0.001
    delta_prob_Immune = 0.1

    dict_filenames_tot_distance = {
    }  # i will save the filename as key and the tot distance from that curve to the original one

    prob_Immune = prob_Immune_min
    while prob_Immune <= prob_Immune_max:

        print "prom Immune:", prob_Immune

        prob_infection = prob_min
        while prob_infection <= prob_max:

            print "  p:", prob_infection

            if for_testing_fixed_set == "YES":
                output_file2 = dir + "Average_time_evolution_Infection_train_test_p" + str(
                    prob_infection) + "_" + "Immune" + str(
                        prob_Immune) + "_" + str(Niter) + "iter_2012.dat"

            else:
                output_file2 = dir + "Average_time_evolution_Infection_p" + str(
                    prob_infection) + "_" + "Immune" + str(
                        prob_Immune) + "_" + str(Niter) + "iter_2012.dat"

            file2 = open(output_file2, 'wt')
            file2.close()

            #  list_final_I_values_fixed_p=[]  # i dont care about the final values right now, but about the whole time evol
            list_lists_t_evolutions = []

            list_dist_fixed_parameters = []
            list_abs_dist_at_ending_point_fixed_parameters = []
            list_dist_at_ending_point_fixed_parameters = []
            list_final_num_infected = []

            #   list_abs_dist_at_cutting_day=[]

            for iter in range(Niter):

                #print "     iter:",iter

                #######OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
                #    file_name_indiv_evol=output_file2.strip("Average_").split('.dat')[0]+"_indiv_iter"+str(iter)+".dat"

                #   file4 = open(file_name_indiv_evol,'wt')
                #  file4.close()
                ##########################################

                ########### set I.C.

                list_I = []  #list infected doctors
                max_order = 0
                for n in G.nodes():
                    G.node[n]["status"] = "S"  # all nodes are Susceptible
                    if G.node[n]['type'] == "shift":
                        if G.node[n]['order'] > max_order:
                            max_order = G.node[n][
                                'order']  # to get the last shift-order for the time loop
                    else:
                        if G.node[n]['label'] == "Wunderink" or G.node[n][
                                "label"] == "Weiss":
                            G.node[n]["status"] = "I"
                            list_I.append(G.node[n]['label'])

                list_single_t_evolution = []
                list_single_t_evolution.append(
                    2.0)  # I always start with TWO infected doctors!!

                for n in G.nodes(
                ):  # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                    if (G.node[n]['type'] == "A") or (G.node[n]['type']
                                                      == "F"):
                        if G.node[n]['label'] != "Wunderink" and G.node[n][
                                "label"] != "Weiss":
                            rand = random.random()
                            if rand < prob_Immune:
                                G.node[n]["status"] = "Immune"

                ################# the dynamics starts:

                t = 1
                while t <= max_order:  # loop over shifts, in order
                    for n in G.nodes():
                        if G.node[n]['type'] == "shift" and G.node[n][
                                'order'] == t:

                            shift_lenght = int(G.node[n]['shift_lenght'])

                            if shift_lenght == 2 and n not in list_id_weekends_T3:
                                shift_lenght = 1  # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)
                            #  print "one-day weekend", G.node[n]['label'],G.node[n]['shift_lenght']

                            flag_possible_infection = 0
                            for doctor in G.neighbors(
                                    n
                            ):  #first i check if any doctor is infected in this shift
                                if G.node[doctor]["status"] == "I":
                                    flag_possible_infection = 1

                            if flag_possible_infection:
                                for doctor in G.neighbors(
                                        n
                                ):  # then the doctors in that shift, gets infected with prob_infection

                                    for i in range(
                                            shift_lenght
                                    ):  # i repeat the infection process several times, to acount for shift lenght
                                        if G.node[doctor]["status"] == "S":
                                            rand = random.random()
                                            if rand < prob_infection:
                                                G.node[doctor]["status"] = "I"

                                                if G.node[doctor][
                                                        "type"] == "A":  # fellows participate in the dynamics, but i only consider the attendings as real adopters
                                                    list_I.append(
                                                        G.node[doctor]
                                                        ["label"])

                #  if for_testing_fixed_set=="YES":
                #    if t==cutting_day:
                #      list_abs_dist_at_cutting_day.append(abs(float(list_actual_evol[-1])-float(len(list_I))))
                #     print abs(float(list_actual_evol[-1])-float(len(list_I))), float(list_actual_evol[t]),float(len(list_I))

                    list_single_t_evolution.append(float(len(list_I)))

                    t += 1

                    ######## end t loop

                ########OJO~!!!!!!!!!! COMENTAR ESTO CUANDO ESTOY BARRIENDO TOOOOOOOOOODO EL ESPACIO DE PARAMETROS
            # file4 = open(file_name_indiv_evol,'at')
            #for i in range(len(list_single_t_evolution)):  #time step by time step
            #  print >> file4, i,list_single_t_evolution[i], prob_infection, prob_Immune
            #file4.close()
            ########################################################

                list_lists_t_evolutions.append(list_single_t_evolution)

                list_dist_fixed_parameters.append(
                    compare_real_evol_vs_simus_to_be_called.compare_two_curves(
                        list_actual_evol, list_single_t_evolution))

                list_abs_dist_at_ending_point_fixed_parameters.append(
                    abs(list_single_t_evolution[-1] - list_actual_evol[-1])
                )  # i save the distance at the ending point between the current simu and actual evol
                list_dist_at_ending_point_fixed_parameters.append(
                    list_single_t_evolution[-1] - list_actual_evol[-1]
                )  # i save the distance at the ending point between the current simu and actual evol
                list_final_num_infected.append(list_single_t_evolution[-1])

            ######## end loop Niter

            list_pair_dist_std_delta_end = []

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_dist_fixed_parameters)
            )  # average dist between the curves over Niter
            list_pair_dist_std_delta_end.append(
                numpy.std(list_dist_fixed_parameters))

            list_pair_dist_std_delta_end.append(
                numpy.mean(list_abs_dist_at_ending_point_fixed_parameters))

            if for_testing_fixed_set == "NO":
                file3 = open(output_file3, 'at')  # i print out the landscape
                print >> file3, prob_infection, prob_Immune, numpy.mean(
                    list_abs_dist_at_ending_point_fixed_parameters
                ), numpy.mean(list_dist_fixed_parameters), numpy.mean(
                    list_final_num_infected), numpy.std(
                        list_final_num_infected)
                file3.close()

            if (
                    numpy.mean(list_abs_dist_at_ending_point_fixed_parameters)
            ) <= delta_end:  # i only consider situations close enough at the ending point

                dict_filenames_tot_distance[
                    output_file2] = list_pair_dist_std_delta_end

            file2 = open(output_file2, 'at')
            for s in range(len(list_single_t_evolution)):
                list_fixed_t = []
                for iter in range(Niter):
                    list_fixed_t.append(list_lists_t_evolutions[iter][s])
                print >> file2, s, numpy.mean(list_fixed_t)
            file2.close()

            print "printed out: ", output_file2
            # raw_input()

            if envelopes == "YES":
                calculate_envelope_set_curves.calculate_envelope(
                    list_lists_t_evolutions, percent_envelope, "Infection",
                    [prob_infection, prob_Immune])

            if for_testing_fixed_set == "YES":

                num_valid_endings = 0.
                for item in list_abs_dist_at_ending_point_fixed_parameters:
                    if item <= delta_end:  # i count how many realizations i get close enough at the ending point
                        num_valid_endings += 1.

                print "average distance of the optimum in the testing segment:", numpy.mean(
                    list_dist_fixed_parameters), numpy.std(
                        list_dist_fixed_parameters
                    ), list_dist_fixed_parameters, "\n"
                print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                    list_dist_at_ending_point_fixed_parameters
                ), "SD final dist", numpy.std(
                    list_dist_at_ending_point_fixed_parameters
                ), list_dist_at_ending_point_fixed_parameters, "\n"

                histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_infection_p" + str(
                    prob_infection) + "_" + "Immune" + str(
                        prob_Immune) + "_" + str(Niter) + "iter_day" + str(
                            cutting_day) + ".dat"
                histograma_gral_negv_posit.histograma(
                    list_dist_at_ending_point_fixed_parameters,
                    histogram_filename)

                histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_p" + str(
                    prob_infection) + "_" + "Immune" + str(
                        prob_Immune) + "_" + str(Niter) + "iter_day" + str(
                            cutting_day) + ".dat"

                histograma_bines_gral.histograma_bins(
                    list_dist_fixed_parameters, Nbins, histogram_filename2)

                output_file10 = "../Results/weight_shifts/Summary_results_training_segment_infection_p" + str(
                    prob_infection) + "_" + "Immune" + str(
                        prob_Immune) + "_" + str(Niter) + "iter_day" + str(
                            cutting_day) + ".dat"
                file10 = open(output_file10, 'wt')

                print >> file10, "Summary results from train-testing infection with", Niter, "iter, and with values for the parameters:  prob_inf ", prob_infection, " prob immune: ", prob_Immune, "\n"

                print >> file10, "average distance of the optimum in the testing segment:", numpy.mean(
                    list_dist_fixed_parameters), numpy.std(
                        list_dist_fixed_parameters
                    ), list_dist_fixed_parameters, "\n"
                print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                    list_dist_at_ending_point_fixed_parameters
                ), "SD final dist", numpy.std(
                    list_dist_at_ending_point_fixed_parameters
                ), list_dist_at_ending_point_fixed_parameters, "\n"

                print >> file10, "written optimum train_test evolution file:", output_file2
                print >> file10, "written histogram file: ", histogram_filename

                file10.close()

                print "written Summary file: ", output_file10
                print "written histogram file: ", histogram_filename
                print "written histogram file: ", histogram_filename2

            prob_infection += delta_prob
        prob_Immune += delta_prob_Immune

    if for_testing_fixed_set == "NO":  # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum
        compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(
            dict_filenames_tot_distance, "Infection_weight", all_team, Niter,
            None)  # last argument doesnt apply (cutting day)

    if for_testing_fixed_set == "NO":
        print "written landscape file:", output_file3
def main(graph_name):
 

   G = nx.read_gml(graph_name)
 
   list_id_weekends_T3=look_for_T3_weekends(G)  # T3 doesnt share fellows in the weekend  (but they are the exception)



   cutting_day=175  # to separate   training-testing

   Niter_training=1000
  

   delta_end=3  # >= than + or -  dr difference at the end of the evolution

   dir_real_data='../Results/'
   dir="../Results/weight_shifts/persuasion/"  


   all_team="NO"   # as adopters or not
   Nbins=20   # for the histogram of sum of distances


   fixed_param="FIXED_mutual0.5_damping.5_"    # or ""  # for the Results file that contains the sorted list of best parameters




  # fixed_parameters="mutual_encoug0.5_threshold0.5"   # for the Landscape text file CHANGE PARAMETERS ACCORDINGLY!!!

#output_file3="../Results/weight_shifts/Landscape_parameters_persuasion_train_test_"+str(fixed_parameters)+"_"+str(Niter_training)+"iter.dat"
   output_file3="../Results/weight_shifts/Landscape_parameters_persuasion_train_FIXED_damping0.1_threshold0.7_"+str(Niter_training)+"iter_alphaA_eq_alphaF.dat"  
   file3 = open(output_file3,'wt')        
   file3.close()

 


######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################



   if all_team=="YES":    
      print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
      exit()

   else:
      filename_actual_evol="../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
  


   file1=open(filename_actual_evol,'r')         ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
   list_lines_file=file1.readlines()
            

   list_actual_evol=[]  
   for line in list_lines_file:      # [1:]:   # i exclude the first row   
     
      num_adopters= float(line.split(" ")[1])          
      list_actual_evol.append(num_adopters)

   list_actual_evol_training=list_actual_evol[:cutting_day]

##################################################################


#../Results/network_final_schedule_withTeam3/Time_evolutions_Persuasion_alpha0.2_damping0.0_mutual_encourg0.7_threshold0.4_unif_distr_50iter_2012_seed31Oct_finalnetwork.dat

 
   alpha_F_min=0.10   #   # alpha=0: nobody changes their mind
   alpha_F_max=0.9    
   delta_alpha_F=0.10    #AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT!
   

   min_damping=0.500   #0.0     #its harder to go back from YES to NO again. =1 means no effect, =0.5 half the movement from Y->N than the other way around, =0 never go back from Y to N
   max_damping=0.501    #0.451
   delta_damping=0.10  
   
   


   min_mutual_encouragement=0.50   #  # when two Adopters meet, they convince each other even more
   max_mutual_encouragement=0.501   
   delta_mutual_encouragement=0.10
   
   
   threshold_min=0.10   #  # larger than, to be an Adopter
   threshold_max=0.901 
   delta_threshold=0.10   # AVOID 1.0 OR THE DYNAMICS GETS TOTALLY STUCK AND IT IS NOT ABLE TO PREDICT SHIT
 


   
   
   print "\n\nPersuasion process on network, with Niter:",Niter_training
   
   
   dict_filenames_tot_distance={}   # i will save the filename as key and the tot distance from that curve to the original one
   dict_filenames_prod_distances={}   


  

   threshold=threshold_min
   while   threshold<= threshold_max:
      print   "thershold:",threshold

      alpha_F=alpha_F_min
      while alpha_F<= alpha_F_max:            # i explore all the parameter space, and create a file per each set of values
        alpha_A=1.0*alpha_F
        print "  alpha_F:",alpha_F

        mutual_encouragement=min_mutual_encouragement  
        while  mutual_encouragement <= max_mutual_encouragement:
          print "    mutual_encouragement:",mutual_encouragement

          damping=min_damping
          while   damping <= max_damping:
            print "      damping:",damping


         
#            dir="../Results/weight_shifts/persuasion/alpha%.2f_damping%.2f/"  % (alpha_F, damping )
           
            output_file=dir+"Time_evolutions_Persuasion_training_alpha"+str(alpha_F)+"_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_threshold"+str(threshold)+"_unif_distr_"+str(Niter_training)+"iter_alphaA_eq_alphaF"+"_"+str(cutting_day)+".dat"         


           # file = open(output_file,'wt')     # i am not saving the train file, because i will just want to know 
            #file.close()          # the optimum parameter set and go look for the whole-250-day file
            


            time_evol_number_adopters_ITER=[]  # list of complete single realizations of the dynamics
            list_dist_fixed_parameters=[]
            list_dist_at_ending_point_fixed_parameters=[]
            list_dist_abs_at_ending_point_fixed_parameters=[]

           
            list_networks_at_cutting_day=[]

            list_final_num_adopt=[]


            for iter in range(Niter_training):

               # print "         ",iter
                list_t=[]
           
                time_evol_number_adopters=[]   # for a single realization of the dynamics

               


                num_adopters , seed_shift ,max_shift= set_ic(G,threshold)   # i establish who is Adopter and NonAdopter initially, and count how many shifts i have total

                time_evol_number_adopters.append(float(num_adopters))               
                list_t.append(0)



                
               ########### the dynamics starts:                 
                t=int(seed_shift)+1   # the first time step is just IC.???


                while t< cutting_day:  # loop over shifts, in chronological order  (the order is the day index since seeding_day) 
                         
                    list_t.append(t)
                    for n in G.nodes():
                        if G.node[n]['type']=="shift" and G.node[n]['order']==t:  # i look for the shift corresponding to that time step                    

                            shift_lenght=int(G.node[n]['shift_lenght'])
                           
                            if shift_lenght==2 and n not in list_id_weekends_T3:
                               shift_lenght=1   # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)



                            flag_possible_persuasion=0
                            for doctor in G.neighbors(n):                               
                                if G.node[doctor]["status"]=="Adopter":   #first i check if any doctor is an adopter in this shift         
                                    flag_possible_persuasion=1                               
                                    break

                            if flag_possible_persuasion==1:
                                list_doctors=[]
                                for doctor in G.neighbors(n):   # for all drs in that shift
                                    list_doctors.append(doctor)
                                
                                
                                pairs=itertools.combinations(list_doctors,2)    # cos the shift can be 2 but also 3 doctors 
                                for pair in pairs:
                                    doctor1=pair[0]
                                    doctor2=pair[1]
                                                                                        
                                    if G.node[doctor1]['status'] != G.node[doctor2]['status']:  # if they think differently, 
                                                                                              # there will be persuasion
                                        persuasion(G,damping,doctor1,doctor2,alpha_A,alpha_F,threshold,shift_lenght)   # i move their values of opinion                  
                                        update_opinions(G,threshold,doctor1,doctor2) #  i update status and make sure the values of the vectors stay between [0,1] 
                                  
                                    else:  # if two Adopters meet, they encourage each other (if two NonAdopters, nothing happens)
                                   
                                       mutual_reinforcement(G,mutual_encouragement,doctor1,doctor2,shift_lenght)
                                  
                               
                    list_all_Adopters=[]  #including fellows        
                    list_Adopters=[]        #NOT including fellows 
                    for n in G.nodes():              
                        try:
                            if  G.node[n]["status"]=="Adopter":                                                    
                                if G.node[n]["label"] not in list_Adopters and G.node[n]["type"]=="A":
                                    list_Adopters.append(G.node[n]["label"])
                        except: pass  # if the node is a shift, it doesnt have a 'status' attribute


        
                   


                    time_evol_number_adopters.append(float(len(list_Adopters)))

                    t+=1
   

                ############## end while loop over t
               


               
                time_evol_number_adopters_ITER.append(time_evol_number_adopters)


                list_final_num_adopt.append(time_evol_number_adopters[-1])

               
                list_dist_fixed_parameters.append(compare_real_evol_vs_simus_to_be_called.compare_two_curves( list_actual_evol_training,time_evol_number_adopters))
               
                list_dist_abs_at_ending_point_fixed_parameters.append( abs(time_evol_number_adopters[-1]-list_actual_evol_training[-1]) )

                list_dist_at_ending_point_fixed_parameters.append( time_evol_number_adopters[-1]-list_actual_evol_training[-1]) 



               
              
             

            #######################   end loop Niter for the training fase


            list_pair_dist_std_delta_end=[]
        
            list_pair_dist_std_delta_end.append(numpy.mean(list_dist_fixed_parameters) )   # average dist between the curves over Niter
            list_pair_dist_std_delta_end.append(numpy.std(list_dist_fixed_parameters) )

            list_pair_dist_std_delta_end.append(numpy.mean(list_dist_abs_at_ending_point_fixed_parameters))

         

                     
            value=numpy.mean(list_dist_fixed_parameters) *numpy.mean(list_dist_abs_at_ending_point_fixed_parameters) # if SD=0, it is a problem, because then that is the minimun value, but not the optimum i am looking for!!
        
            dict_filenames_prod_distances[output_file]=  value                  



            file3 = open(output_file3,'at')          # i print out the landscape           
            print >> file3, alpha_F, damping, mutual_encouragement, threshold,numpy.mean(list_dist_abs_at_ending_point_fixed_parameters), numpy.mean(list_dist_fixed_parameters),  numpy.mean(list_final_num_adopt),numpy.std(list_final_num_adopt),  numpy.std(list_final_num_adopt)/numpy.mean(list_final_num_adopt)
            file3.close()




            histogram_filename="../Results/weight_shifts/histogr_raw_distances_ending_test_train_alpha"+str(alpha_F)+"_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_threshold"+str(threshold)+"_unif_distr_"+str(Niter_training)+"iter_alphaA_eq_alphaF"+"_"+str(cutting_day)+".dat"     
            histograma_gral_negv_posit.histograma(list_dist_at_ending_point_fixed_parameters,histogram_filename)
            
            histogram_filename2="../Results/weight_shifts/histogr_sum_dist_traject_infection_training_alpha"+str(alpha_F)+"_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_threshold"+str(threshold)+"_unif_distr_"+str(Niter_training)+"iter_alphaA_eq_alphaF"+"_"+str(cutting_day)+".dat"     
            
            histograma_bines_gral.histograma_bins(list_dist_fixed_parameters,Nbins,histogram_filename2)


            print  "written histogram file: ",histogram_filename
            print  "written histogram file: ",histogram_filename2


            if (numpy.mean(list_dist_abs_at_ending_point_fixed_parameters)) <= delta_end:  # i only consider situations close enough at the ending point   

               dict_filenames_tot_distance[output_file]=list_pair_dist_std_delta_end 


             



   
          #  file = open(output_file,'wt')        
           # for i in range(len(time_evol_number_adopters)):  #time step by time step
            #    list_fixed_t=[]
             #   for iteracion in range (Niter_training): #loop over all independent iter of the process
              #      list_fixed_t.append(time_evol_number_adopters_ITER[iteracion][i])  # i collect all values for the same t, different iter  

               # print >> file, list_t[i],numpy.mean(list_fixed_t),numpy.std(list_fixed_t), alpha_F,damping,mutual_encouragement       
            #file.close()

           

          
            damping += delta_damping
          mutual_encouragement += delta_mutual_encouragement
        alpha_F += delta_alpha_F
      threshold  += delta_threshold
    



   list_order_dict=  compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(dict_filenames_tot_distance,"Persuasion_training_land_weight",all_team,Niter_training,cutting_day)


  
   string_name="_persuasion_training_"+fixed_param+str(Niter_training)+"iter_"+str(cutting_day)+".dat"            # for the "Results" file with the sorted list of files
   
   list_order_dict2= compare_real_evol_vs_simus_to_be_called.pick_minimum_prod_distances(dict_filenames_prod_distances,string_name,all_team,Niter_training,cutting_day)

  


#./Results/network_final_schedule_withTeam3_local/Time_evolutions_Persuasion_alpha0.4_damping0.4_mutual_encourg0.6_threshold0.5_unif_distr_2iter_2012_seed31Oct_finalnetwork.dat


   optimum_filename=list_order_dict[0][0]


   print optimum_filename   
   alpha_F=float(list_order_dict[0][0].split("_alpha")[1].split("_")[0])
   alpha_A=0.5*alpha_F
   damping=float(list_order_dict[0][0].split("_damping")[1].split("_")[0])
   mutual_encouragement=float(list_order_dict[0][0].split("_mutual_encourg")[1].split("_")[0])
   threshold=float(list_order_dict[0][0].split("_threshold")[1].split("_")[0])
  
  
            
                

  
   print "Optimum (old method) alpha=", alpha_F, " damping=",damping," mutual encourag=",mutual_encouragement," threshold",threshold
   
  
  
   optimum_filename=list_order_dict2[0][0]

   print optimum_filename   
   alpha_F=float(list_order_dict2[0][0].split("_alpha")[1].split("_")[0])
   alpha_A=0.5*alpha_F
   damping=float(list_order_dict2[0][0].split("_damping")[1].split("_")[0])
   mutual_encouragement=float(list_order_dict2[0][0].split("_mutual_encourg")[1].split("_")[0])
   threshold=float(list_order_dict2[0][0].split("_threshold")[1].split("_")[0])
  
  
            
                

  
   print "Optimum (product distances and SDs) alpha=", alpha_F, " damping=",damping," mutual encourag=",mutual_encouragement," threshold",threshold
   
  
  





   output_file10="../Results/weight_shifts/Summary_results_train_test_persuasion_alpha"+str(alpha_F)+"_FIXED_damping"+str(damping)+"_mutual_encourg"+str(mutual_encouragement)+"_FIXED_threshold"+str(threshold)+"_"+str(Niter_training)+"iter_alphaA_eq_alphaF_day"+str(cutting_day)+".dat"         
   file10 = open(output_file10,'wt')    

   print >> file10, "Summary results from train-testing persuasion with",Niter_training, "iter, using the avg of the cutting points as IC, and with values for the parameters:  alpha ",alpha_F," damping: ",damping," mutual_encourg: ",mutual_encouragement," threshold:",threshold


   print >> file10,  "Look for optimum the file set of parameters (or run those simulations):",optimum_filename
  

   file10.close()




   print "Look for optimum the file set of parameters (or run those simulations):",optimum_filename
  

   print "printed out landscape file:",output_file3
Ejemplo n.º 21
0
def main(graph_name):

    G = nx.read_gml(graph_name)

    Niter = 10000

    dir_real_data = '../Results/'

    Nbins = 100

    all_team = "NO"  # as adopters or not

    # output_file3=dir_real_data+"Landscape_parameters_persuasion_"+str(Niter)+"iter.dat"
    #file3 = open(output_file3,'wt')

    ######################################################################################
    #  I read the file of the actual evolution of the idea spreading in the hospital:   ##
    ######################################################################################

    if all_team == "YES":
        filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_all_team_as_adopters_SIMPLER.csv"

    else:
        filename_actual_evol = dir_real_data + "HospitalModel_august1_adoption_counts_SIMPLER.csv"
    #ya no necesito CAMBIAR TB EL NOMBRE DEL ARCHIVO EN EL CODIGO PARA COMPARAR CURVAs

    list_actual_evol = []
    result_actual_file = csv.reader(open(filename_actual_evol, 'rb'),
                                    delimiter=',')
    cont = 0
    for row in result_actual_file:
        if cont > 0:  # i ignore the first line with the headers

            num_adopters = row[3]

            list_actual_evol.append(float(num_adopters))

        cont += 1

##################################################################

#../Results/network_final_schedule_withTeam3/Time_evolutions_Persuasion_alpha0.1_damping0.3_mutual_encourg0.3_threshold0.2_unif_distr_50iter_2012_seed31Oct_finalnetwork.dat

    alpha_F = 0.10  # alpha=0: nobody changes their mind

    alpha_A = 0.5 * alpha_F

    damping = 0.3  #its harder to go back from YES to NO again. =1 means no effect, =0.5 half the movement from Y->N than the other way around, =0 never go back from Y to N

    mutual_encouragement = 0.3  # when two Adopters meet, they convince each other even more

    threshold = 0.20  # larger than, to be an Adopte

    print "\n\nPersuasion process on network, with Niter:", Niter

    dict_timestep_list_delta_positions = {}
    for i in range(len(list_actual_evol)):
        dict_timestep_list_delta_positions[i] = []

    dir = "../Results/network_final_schedule_withTeam3_local/"
    output_file = dir + "Time_evolutions_Persuasion_alpha" + str(
        alpha_F) + "_damping" + str(damping) + "_mutual_encourg" + str(
            mutual_encouragement) + "_threshold" + str(
                threshold) + "_unif_distr_" + str(
                    Niter) + "iter_2012_seed31Oct_finalnetwork.dat"
    file = open(output_file, 'wt')
    file.close()

    list_delta_position = []

    time_evol_number_adopters_ITER = [
    ]  # list of complete single realizations of the dynamics

    for iter in range(Niter):

        print "         ", iter
        list_t = []

        time_evol_number_adopters = [
        ]  # for a single realization of the dynamics

        num_adopters, seed_shift, max_shift = set_ic(
            G, threshold
        )  # i establish who is Adopter and NonAdopter initially, and count how many shifts i have total

        time_evol_number_adopters.append(float(num_adopters))
        # print "initial number of adopters:", num_adopters
        list_t.append(0)

        # the dynamics starts:
        t = int(seed_shift) + 1  # the first time step is just IC.???

        while t <= max_shift:  # loop over shifts, in chronological order  (the order is the day index since seeding_day)

            list_t.append(t)
            for n in G.nodes():
                if G.node[n]['type'] == "shift" and G.node[n][
                        'order'] == t:  # i look for the shift corresponding to that time step
                    flag_possible_persuasion = 0
                    for doctor in G.neighbors(n):
                        if G.node[doctor][
                                "status"] == "Adopter":  #first i check if any doctor is an adopter in this shift
                            flag_possible_persuasion = 1
                            break

                    if flag_possible_persuasion == 1:
                        list_doctors = []
                        for doctor in G.neighbors(
                                n):  # for all drs in that shift
                            list_doctors.append(doctor)

                        pairs = itertools.combinations(
                            list_doctors,
                            2)  # cos the shift can be 2 but also 3 doctors
                        for pair in pairs:
                            doctor1 = pair[0]
                            doctor2 = pair[1]

                            if G.node[doctor1]['status'] != G.node[doctor2][
                                    'status']:  # if they think differently,
                                # there will be persuasion
                                persuasion(G, damping, doctor1, doctor2,
                                           alpha_A, alpha_F, threshold,
                                           list_delta_position, t,
                                           dict_timestep_list_delta_positions
                                           )  # i move their values of opinion
                                update_opinions(
                                    G, threshold, doctor1, doctor2
                                )  #  i update status and make sure the values of the vectors stay between [0,1]

                            else:  # if two Adopters meet, they encourage each other (if two NonAdopters, nothing happens)

                                mutual_reinforcement(
                                    G, mutual_encouragement, doctor1, doctor2,
                                    list_delta_position, t,
                                    dict_timestep_list_delta_positions)

            list_Adopters = []  #count how many i have at this time
            for n in G.nodes():
                try:
                    if G.node[n]["status"] == "Adopter":
                        if G.node[n]["label"] not in list_Adopters:
                            list_Adopters.append(G.node[n]["label"])
                except:
                    pass  # if the node is a shift, it doesnt have a 'status' attribute

            time_evol_number_adopters.append(float(len(list_Adopters)))

            t += 1

        ############## end while loop over t

        time_evol_number_adopters_ITER.append(time_evol_number_adopters)

    ##############end loop Niter

    average_time_evol_number_adopters = []
    for i in range(len(time_evol_number_adopters)):  #time step by time step
        list_fixed_t = []
        for iteracion in range(
                Niter):  #loop over all independent iter of the process
            list_fixed_t.append(
                time_evol_number_adopters_ITER[iteracion]
                [i])  # i collect all values for the same t, different iter

        average_time_evol_number_adopters.append(
            numpy.mean(list_fixed_t))  # i create the mean time evolution

# print list_delta_position

    histograma_bines_gral.histograma_bins(
        list_delta_position, Nbins,
        "../Results/histogr_delta_positions_alpha" + str(alpha_F) +
        "_damping" + str(damping) + "_mutual_encourg" +
        str(mutual_encouragement) + "_threshold" + str(threshold) + "_" +
        str(Niter) + "iter_" + str(Nbins) + "bins.dat")

    dir = "../Results/"
    output_file3 = dir + "List_delta_positions_vs_timestep" + str(
        alpha_F) + "_damping" + str(damping) + "_mutual_encourg" + str(
            mutual_encouragement) + "_threshold" + str(threshold) + "_" + str(
                Niter) + "iter.dat"
    file3 = open(output_file3, 'wt')

    for key in dict_timestep_list_delta_positions:
        print "\n", key,
        print >> file3, "\n", key,
        for item in dict_timestep_list_delta_positions[key]:
            print item,
            print >> file3, item,

    file3.close()

    print "\n written:", output_file3
def main(graph_name):

    G = nx.read_gml(graph_name)

    for_testing_fixed_set = "YES"  # when YES, fixed values param, to get all statistics on final distances etc
    # change the range for the parameters accordingly

    envelopes = "NO"

    Niter = 1000

    percent_envelope = 95.

    list_id_weekends_T3 = look_for_T3_weekends(
        G
    )  # T3 doesnt share fellows in the weekend  (but they are the exception)

    cutting_day = 175

    all_team = "NO"  # as adopters or not

    dir_real_data = '../Results/'

    dir = "../Results/weight_shifts/infection/"

    delta_end = 3.  # >= than + or -  dr difference at the end of the evolution (NO realization ends up closer than this!!!! if 2, i get and empty list!!!)
    Nbins = 20  # for the histogram of sum of distances

    if for_testing_fixed_set == "NO":
        output_file3 = "../Results/weight_shifts/Landscape_parameters_infection_memory_fixed_dose_thr_" + str(
            Niter) + "iterFIXED_Thr0.2_Imm0.0.dat"
        file3 = open(output_file3, 'wt')
        file3.close()

######################################################################################
#  I read the file of the actual evolution of the idea spreading in the hospital:   ##
######################################################################################

    if all_team == "YES":
        print "remember that now i use the file of adopters without fellows\n../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"
        exit()

    else:
        filename_actual_evol = "../Results/Actual_evolution_adopters_NO_fellows_only_attendings.dat"

    file1 = open(
        filename_actual_evol, 'r'
    )  ## i read the file:  list_dates_and_names_current_adopters.txt  (created with: extract_real_evolution_number_adopters.py)
    list_lines_file = file1.readlines()

    list_actual_evol = []
    for line in list_lines_file:  # [1:]:   # i exclude the first row

        num_adopters = float(line.split(" ")[1])
        list_actual_evol.append(num_adopters)

################################################################################

    prob_min = 0.3
    prob_max = 0.301
    delta_prob = 0.1

    prob_Immune_min = 0.00
    prob_Immune_max = 0.001
    delta_prob_Immune = 0.1

    dose_min = 0.7  # of a single encounter with an infected  (starting from zero doesnt make sense)
    dose_max = 0.701
    delta_dose = 0.01

    ##########  KEEP FIXED TO ONE
    infect_threshold_min = 1.00  # i can define the dose in units of the threshold
    infect_threshold_max = 1.001
    delta_infect_threshold = 0.1
    ############

    dict_filenames_tot_distance = {
    }  # i will save the filename as key and the tot distance from that curve to the original one

    prob_Immune = prob_Immune_min
    while prob_Immune <= prob_Immune_max:

        print "prom Immune:", prob_Immune

        prob_infection = prob_min
        while prob_infection <= prob_max:

            print "  p:", prob_infection

            infect_threshold = infect_threshold_min
            while infect_threshold <= infect_threshold_max:

                print "  threshold:", infect_threshold

                dose = dose_min
                while dose <= dose_max:

                    print "  dose:", dose

                    if for_testing_fixed_set == "YES":
                        output_file2 = dir + "Average_time_evolution_Infection_memory_train_test_p" + str(
                            prob_infection) + "_Immune" + str(
                                prob_Immune) + "_FIXED_threshold" + str(
                                    infect_threshold) + "_dose" + str(
                                        dose) + "_" + str(Niter) + "iter.dat"
                    else:
                        output_file2 = dir + "Average_time_evolution_Infection_memory_p" + str(
                            prob_infection) + "_Immune" + str(
                                prob_Immune) + "_FIXED_threshold" + str(
                                    infect_threshold) + "_dose" + str(
                                        dose) + "_" + str(Niter) + "iter.dat"

                    file2 = open(output_file2, 'wt')
                    file2.close()

                    num_shifts = 0
                    for n in G.nodes():
                        G.node[n]["status"] = "S"
                        G.node[n][
                            "infec_value"] = 0.  # when this value goes over the infect_threshold, the dr is infected
                        if G.node[n]['type'] == "shift":
                            num_shifts += 1

                    list_lists_t_evolutions = [
                    ]  # i create the empty list of list for the Niter temporal evolutions

                    list_dist_fixed_parameters = []
                    list_abs_dist_at_ending_point_fixed_parameters = []
                    list_dist_at_ending_point_fixed_parameters = []
                    list_final_num_infected = []

                    for iter in range(Niter):

                        #  print "     iter:",iter

                        list_I = []  #list infected doctors
                        list_ordering = []
                        list_s = []

                        ########### set I.C.

                        max_order = 0
                        for n in G.nodes():
                            G.node[n][
                                "status"] = "S"  # all nodes are Susceptible
                            if G.node[n]['type'] == "shift":
                                list_s.append(n)
                                if G.node[n]['order'] > max_order:
                                    max_order = G.node[n]['order']
                            else:
                                if G.node[n]['label'] == "Wunderink" or G.node[
                                        n]["label"] == "Weiss":
                                    G.node[n]["status"] = "I"
                                    G.node[n][
                                        "infec_value"] = infect_threshold + 1.
                                    list_I.append(G.node[n]['label'])

                        list_single_t_evolution = []
                        list_single_t_evolution.append(
                            2.0)  # I always start with TWO infected doctors!!

                        for n in G.nodes(
                        ):  # i make some DOCTORs INMUNE  (anyone except Weiss and Wunderink)
                            if (G.node[n]['type'] == "A") or (G.node[n]['type']
                                                              == "F"):
                                if G.node[n]['label'] != "Wunderink" and G.node[
                                        n]["label"] != "Weiss":
                                    rand = random.random()
                                    if rand < prob_Immune:
                                        G.node[n]["status"] = "Immune"

                        ################# the dynamics starts:

                        t = 1
                        while t <= max_order:  # loop over shifts, in order
                            for n in G.nodes():
                                if G.node[n]['type'] == "shift" and G.node[n][
                                        'order'] == t:
                                    shift_lenght = int(
                                        G.node[n]['shift_lenght'])

                                    if shift_lenght == 2 and n not in list_id_weekends_T3:
                                        shift_lenght = 1  # because during weekends, the fellow does rounds one day with Att1 and the other day with Att2.  (weekend shifts for T3 are two day long, with no sharing fellows)

                                    flag_possible_infection = 0
                                    for doctor in G.neighbors(
                                            n
                                    ):  #first i check if any doctor is infected in this shift
                                        if G.node[doctor]["status"] == "I":
                                            flag_possible_infection = 1

                                    if flag_possible_infection:
                                        for doctor in G.neighbors(
                                                n
                                        ):  # then the doctors in that shift, gets infected with prob_infection

                                            for i in range(shift_lenght):
                                                if G.node[doctor][
                                                        "status"] == "S":
                                                    rand = random.random()
                                                    if rand < prob_infection:  # with prob p the infection occurres

                                                        G.node[doctor][
                                                            "infec_value"] += dose  # and bumps the infection_value of that susceptible dr

                                                        if G.node[doctor][
                                                                "infec_value"] >= infect_threshold:  # becomes  infected

                                                            G.node[doctor][
                                                                "status"] = "I"
                                                            if G.node[doctor][
                                                                    "type"] == "A":  # fellows participate in the dynamics, but i only consider the attendings as real adopters
                                                                list_I.append(
                                                                    G.node[
                                                                        doctor]
                                                                    ["label"])

                        # for node in G.nodes():
                        #   if G.node[node]['type']!="shift":
                        #     print t, G.node[node]['label'], G.node[node]["infec_value"]
                        #raw_input()
                            list_single_t_evolution.append(float(len(list_I)))

                            t += 1
                            ######## end t loop

                        list_lists_t_evolutions.append(list_single_t_evolution)

                        list_dist_fixed_parameters.append(
                            compare_real_evol_vs_simus_to_be_called.
                            compare_two_curves(list_actual_evol,
                                               list_single_t_evolution))

                        list_abs_dist_at_ending_point_fixed_parameters.append(
                            abs(list_single_t_evolution[-1] -
                                list_actual_evol[-1])
                        )  # i save the distance at the ending point between the current simu and actual evol

                        list_dist_at_ending_point_fixed_parameters.append(
                            list_single_t_evolution[-1] - list_actual_evol[-1]
                        )  # i save the distance at the ending point between the current simu and actual evol

                        list_final_num_infected.append(
                            list_single_t_evolution[-1])

                        ######## end loop Niter

                    list_pair_dist_std_delta_end = []

                    list_pair_dist_std_delta_end.append(
                        numpy.mean(list_dist_fixed_parameters)
                    )  # average dist between the curves over Niter
                    list_pair_dist_std_delta_end.append(
                        numpy.std(list_dist_fixed_parameters))

                    list_pair_dist_std_delta_end.append(
                        numpy.mean(
                            list_abs_dist_at_ending_point_fixed_parameters))

                    if for_testing_fixed_set == "NO":
                        file3 = open(output_file3,
                                     'at')  # i print out the landscape
                        print >> file3, prob_infection, prob_Immune, numpy.mean(
                            list_abs_dist_at_ending_point_fixed_parameters
                        ), numpy.mean(list_dist_fixed_parameters), numpy.mean(
                            list_final_num_infected), numpy.std(
                                list_final_num_infected
                            ), numpy.std(list_final_num_infected) / numpy.mean(
                                list_final_num_infected)
                        file3.close()

                    if (
                            numpy.mean(
                                list_abs_dist_at_ending_point_fixed_parameters)
                    ) <= delta_end:  # i only consider situations close enough at the ending point

                        dict_filenames_tot_distance[
                            output_file2] = list_pair_dist_std_delta_end

                    file2 = open(output_file2, 'at')
                    for s in range(len(list_single_t_evolution)):
                        list_fixed_t = []
                        for iter in range(Niter):
                            list_fixed_t.append(
                                list_lists_t_evolutions[iter][s])
                        print >> file2, s, numpy.mean(list_fixed_t)
                    file2.close()

                    print "printed out: ", output_file2

                    if for_testing_fixed_set == "YES":

                        num_valid_endings = 0.
                        for item in list_abs_dist_at_ending_point_fixed_parameters:
                            if item <= delta_end:  # i count how many realizations i get close enough at the ending point
                                num_valid_endings += 1.

                        print "average distance of the optimum in the testing segment:", numpy.mean(
                            list_dist_fixed_parameters), numpy.std(
                                list_dist_fixed_parameters
                            ), list_dist_fixed_parameters, "\n"
                        print "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                            list_dist_at_ending_point_fixed_parameters
                        ), "SD final dist", numpy.std(
                            list_dist_at_ending_point_fixed_parameters
                        ), list_dist_at_ending_point_fixed_parameters, "\n"

                        histogram_filename = "../Results/weight_shifts/histogr_raw_distances_ending_infection_memory_p" + str(
                            prob_infection
                        ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                            infect_threshold) + "_dose" + str(
                                dose) + "_" + str(Niter) + "iter_day" + str(
                                    cutting_day) + ".dat"
                        histograma_gral_negv_posit.histograma(
                            list_dist_at_ending_point_fixed_parameters,
                            histogram_filename)

                        histogram_filename2 = "../Results/weight_shifts/histogr_sum_dist_traject_infection_memory_p" + str(
                            prob_infection
                        ) + "_Immune" + str(prob_Immune) + "_threshold" + str(
                            infect_threshold) + "_dose" + str(
                                dose) + "_" + str(Niter) + "iter_day" + str(
                                    cutting_day) + ".dat"
                        histograma_bines_gral.histograma_bins(
                            list_dist_fixed_parameters, Nbins,
                            histogram_filename2)

                    output_file10 = "../Results/weight_shifts/Summary_results_training_segment_infection_memory_distrib_p" + str(
                        prob_infection) + "_" + "FIXED_Immune" + str(
                            prob_Immune) + "_FIXED_threshold" + str(
                                infect_threshold
                            ) + "_dose" + str(dose) + "_" + str(
                                Niter) + "iter_day" + str(cutting_day) + ".dat"
                    file10 = open(output_file10, 'wt')

                    print >> file10, "Summary results from train-testing infection with", Niter, "iter, and with values for the parameters:  prob_inf ", prob_infection, " prob immune: ", prob_Immune, "infect. threshold:", infect_threshold, "dose:", dose, "\n"

                    print >> file10, "average distance of the optimum in the testing segment:", numpy.mean(
                        list_dist_fixed_parameters), numpy.std(
                            list_dist_fixed_parameters
                        ), list_dist_fixed_parameters, "\n"
                    print >> file10, "fraction of realizations that end within delta_doctor:", num_valid_endings / Niter, "mean ending dist:", numpy.mean(
                        list_dist_at_ending_point_fixed_parameters
                    ), "SD final dist", numpy.std(
                        list_dist_at_ending_point_fixed_parameters
                    ), list_dist_at_ending_point_fixed_parameters, "\n"

                    print >> file10, "written optimum train_test evolution file:", output_file2
                    print >> file10, "written histogram file: ", histogram_filename

                    file10.close()

                    print "written Summary file: ", output_file10
                    print "written histogram file: ", histogram_filename

                    if envelopes == "YES":
                        calculate_envelope_set_curves.calculate_envelope(
                            list_lists_t_evolutions, percent_envelope,
                            "Infection_memory_fixed", [
                                prob_infection, prob_Immune, infect_threshold,
                                dose
                            ])

                    dose += delta_dose
                infect_threshold += delta_infect_threshold
            prob_infection += delta_prob
        prob_Immune += delta_prob_Immune

        if for_testing_fixed_set == "NO":  # only if i am exploring the whole landscape, i need to call this function, otherwise, i already know the optimum
            compare_real_evol_vs_simus_to_be_called.pick_minimum_same_end(
                dict_filenames_tot_distance, "Infection_memory", all_team,
                Niter, None)
            print "written landscape file:", output_file3