예제 #1
0
def calc_hubs_neighbour_panels(graph, is_save):
    ''' this method gets a graph, calculates its hubs and for each one 
    creates a distribution of neighbours panels'''

    diff_treshold.print_log("calc_hubs_neighbour_panels - start")

    hubs_lst = network.get_hubs(graph)

    diff_treshold.print_log("calc_hubs_neighbour_panels - looping each hub")

    for hub in hubs_lst:
        name = hub[0]
        panels_dict = {}
        for nei in graph.neighbors(name):
            curr_panel = graph.node[nei]["panel_id"]
            panels_dict[curr_panel] = panels_dict.get(curr_panel, 0) + 1

        filename = "panels_hist_for_hub_" + str(name).replace(":",
                                                              "_") + ".png"
        title = "panels distribution of hub " + name + " neighbours"
        xlabel = "panel id"
        ylabel = "count"
        color = "red"

        histogram_dict(panels_dict, filename, title, xlabel, ylabel, color,
                       is_save)
예제 #2
0
파일: network.py 프로젝트: ronlevy6/labov
def find_all_triplets(graph,edge_attr_name):    
    ''' this method runs on the graph and finds triplets of nodes x,i,j such that
    the edges x-i and x-j exist but the edge i-j doesn't'''
    
    
    diff_treshold.print_log("find all triplets start")
    triplets_lst = []
    
    for node_x in graph.nodes():
        #diff_treshold.print_log("find all triplets new node - " + str(node_x))
        nei_lst = graph.neighbors(node_x)
        for i in range(0 , len(nei_lst)):
            node_i = nei_lst[i]
            panel_i = diff_treshold.get_panel_id_substr(node_i)
            node_i_neis = graph.neighbors(node_i)
            corr_i = graph.get_edge_data(node_x,node_i)[edge_attr_name]
            for j in range(i + 1, len(nei_lst)):
                if nei_lst[j] not in node_i_neis and panel_i != diff_treshold.get_panel_id_substr(nei_lst[j]):
                    #node_x and node_i are neighbours, also node_x and node_j
                    #but node_i and node_j not and they are from different panels
                    corr_j = graph.get_edge_data(node_x,nei_lst[j])[edge_attr_name]
                    var = (node_x,node_i,nei_lst[j],corr_i,corr_j)                    
                    triplets_lst.append(var)
                    
                    #print("enter var")
    
    diff_treshold.print_log("find all triplets after loop return list")        
    return triplets_lst
예제 #3
0
파일: network.py 프로젝트: ronlevy6/labov
def find_trios_in_graph_same_panel_also(graph,trait_vals_path):
    ''' gets all trios and adds here also the nodes that are from same panel'''

    diff_treshold.print_log("find_trios_in_graph_same_panel_also - start")
    trios_lst = []
    
    df = pd.read_excel(trait_vals_path, header = 0, index_col = "FlowJo Subject ID")

    corr_df = diff_treshold.create_corr_between_all(df)    
    
    for node_x in graph.nodes():
        
        nei_lst = graph.neighbors(node_x)
        for i in range(0 , len(nei_lst)):
            node_i = nei_lst[i]
            node_i_neis = graph.neighbors(node_i)
            corr_i = graph.get_edge_data(node_x,node_i)["corr"]
            for j in range(i + 1, len(nei_lst)):
                if nei_lst[j] not in node_i_neis :
                    #node_x and node_i are neighbours, also node_x and node_j
                    #but node_i and node_j not and they are from different panels
                    corr_j = graph.get_edge_data(node_x,nei_lst[j])["corr"]
                    unconnected_corr = corr_df.loc[node_i,nei_lst[j]]
                    var = (node_x,node_i,nei_lst[j],corr_i,corr_j,unconnected_corr)                    
                    trios_lst.append(var)
                    #print("enter var")
    
    diff_treshold.print_log("find_trios_in_graph_same_panel_also - after loop return list")        
    return trios_lst    
예제 #4
0
파일: network.py 프로젝트: ronlevy6/labov
def get_hubs(graph):
    ''' this method gets a graph and returns list of nodes with amount of neighbours 
    thats higher than avg + 2 * sd'''

    diff_treshold.print_log("get hubs start - before nodes loop")
    nodes_nei_lst = []  
    only_nei_num = []
    
    
    for node in graph.nodes():
        deg = graph.degree(node)
        nodes_nei_lst.append((node, deg))
        only_nei_num.append(deg)
        
    
    diff_treshold.print_log("get hubs after nodes loop")    
    
    # calculate treshold
    nei_avg = sum(only_nei_num) / len(only_nei_num)
    sd = np.std(only_nei_num)
    treshold = nei_avg + sd * 2
    nodes_nei_lst.sort(key = lambda x: x[1], reverse = True)
    
    hubs_lst = []
    i = 0
    while i < len(nodes_nei_lst) and nodes_nei_lst[i][1] > treshold:
        hubs_lst.append(nodes_nei_lst[i])
        i += 1
    
    return hubs_lst
예제 #5
0
def stats_of_connected_components(graph, field_to_check, name_field, is_save):
    ''' this methdo gets a graph and creates an plotbar for each connected component
    with the count of each required field in the nodes in the connected component'''

    diff_treshold.print_log("stats_of_connected_components - start")

    connected_components = list(nx.connected_components(graph))
    for i in range(0, len(connected_components)):
        group = connected_components[i]
        data_lst = []
        for node_group in group:
            val = graph.node[node_group][field_to_check]
            if type(val) == list:
                data_lst += graph.node[node_group][field_to_check]
            else:
                data_lst.append(val)

        #create a dictionary from the list and plot it
        diff_treshold.print_log(
            "stats_of_connected_components - before plotting")
        d = create_dict_from_list(data_lst)
        label = name_field + " per connected component with " + str(
            len(group)) + " cells"
        xlabel = name_field
        ylabel = "count"
        file_name = name_field + "_for_connected_component_number_" + str(
            i) + ".png"
        histogram_dict(d, file_name, label, xlabel, ylabel, 'yellow', is_save)
예제 #6
0
파일: network.py 프로젝트: ronlevy6/labov
def create_sub_panel_id_connections(treshold_dir,sub_panel_id,index_column,filter_col):
    ''' this method gets a directory, and sub - panel ID in which there are the traits that their
    correlation is higher than treshold. It connects all of them to single DF
    and creates a dictionary'''
    
    diff_treshold.print_log("create_sub_panel_id_connections start")

    is_first = True
    did_something = False
    #check which pattern to take
    if sub_panel_id != None:
        pattern = sub_panel_id + '_P\d\d_treshold.*corr.csv'
    else:
        #no sub pannel id given - run on all data
        pattern = PATTERN
    
    for filename in os.listdir(treshold_dir):
        if re.match(pattern,filename) != None:
            #correct file
            full_path = treshold_dir + filename
            diff_treshold.print_log("create_sub_panel_id_connections - before read file")
            #check if should be with index or not!!!!!!!!!!!!!
            if is_first:
                df = pd.read_csv(full_path,header = 0, index_col = index_column)
                is_first = False
            else:
                tmp = pd.read_csv(full_path,header = 0, index_col = index_column)
                df = df.append(tmp, ignore_index = False)
            diff_treshold.print_log("create_sub_panel_id_connections - after read file")
            did_something = True
    #now the df is full with all the data
    diff_treshold.print_log("create_sub_panel_id_connections - created united DF, before mega dict")
    
    if not (did_something):
        #no data for specific sub_panel_id
        return
        
    d = {} #init empty dict
    df.sort_values(by = "corr", ascending = False, inplace = True)
    idx_lst = remove_dups(df.index)
    for idx in idx_lst:
        vals = df.loc[idx][filter_col] #get trait2 values for specific trait
        
        if type(vals) == str:
            vals = [vals] #one variable list
        else:
            vals = list(vals)
        d[idx] = vals
    
    diff_treshold.print_log("create_sub_panel_id_connections - after dict - return it")           
    
    return d
예제 #7
0
파일: network.py 프로젝트: ronlevy6/labov
def create_p_val_network(p_val_file, max_p_val):
    ''' creates a full network'''
    
    diff_treshold.print_log("create_p_val_network - start")           
    final_dict = {}
    
    d = create_p_val_network_one_side(p_val_file, max_p_val, "trait1","trait2")
    final_dict = combine_dicts(final_dict,d)
    
    d = create_p_val_network_one_side(p_val_file, max_p_val, "trait2","trait1")
    final_dict = combine_dicts(final_dict,d)
    
    return final_dict
예제 #8
0
파일: network.py 프로젝트: ronlevy6/labov
def get_file_suffix(filename):
    ''' This method gets a filename and returns its suffix'''
    
    diff_treshold.print_log("get_file_suffix start")
    
    rev = filename[::-1] # reverse the file
    pos = rev.find(".") #last "." in original filename
    rev_ret = rev[:pos]
    ret = rev_ret[::-1]
    
    diff_treshold.print_log("get_file_suffix end")    
    
    return ret
예제 #9
0
def calc_hub_and_neighbours_markers(graph, is_save):
    ''' this method gets a graph, uses get hubs to find the center nodes 
    and calculates the markeres of the neighbours of those nodes'''

    diff_treshold.print_log("calc_hub_and_neighbours_markers start")

    hubs_lst = network.get_hubs(graph)

    diff_treshold.print_log(
        "calc_hub_and_neighbours_markers after get hubs before loop")
    for hub in hubs_lst:
        hub_name = hub[0]
        markers = []  # init empty list to be populated with all markers
        for neighbour in graph.neighbors(hub_name):
            markers += graph.node[neighbour]["markers_lst"]

        # create a dictionary from the list when key is marker and value is count
        diff_treshold.print_log(
            "calc_hub_and_neighbours_markers in loop after list creation")
        d = create_dict_from_list(markers)
        label = "histogram of markers of the neighbours of the hub " + hub_name
        xlabel = "markers"
        ylabel = "count"
        file_name = "markers_of_neigbours_of_hub_" + hub_name.replace(
            ":", "_") + ".png"
        histogram_dict(d, file_name, label, xlabel, ylabel, 'green', is_save)

    diff_treshold.print_log("calc_hub_and_neighbours_markers finished")
예제 #10
0
def shuffle_triplets(triplets_df_path, trait_vals_path, ret_df_path, num_of_iterations = 1000, is_return = False):    
    ''' this method check the of a triplet is robust or not by shuffling its data 100 times and createing 100 different triplets
    of same cells, each time saving the score of the triplet'''
    
    diff_treshold.print_log("Triplet.shuffle_triplets - start, before read files")
    
    trait_vals_df = pd.read_csv(trait_vals_path, header=0,index_col="FlowJo Subject ID")
    
    df_no_zero =  trait_vals_df.replace(0,np.nan)
    
    
    triplet_lst = []
    #this list will contain the result of the permutaions and original data   
    data_lst = []
    
    triplets_df = pd.read_excel(triplets_df_path, header = 0)
    needed_df = triplets_df[["center","node1","node2","corr1","corr2","low corr", "triplet score"]]
    
    diff_treshold.print_log("Triplet.shuffle_triplets - after read file,before putting real data")
    for i in range(0, len(needed_df)):
        var = tuple(needed_df.iloc[i])
        var = (0,0) + var
        var = var[1:]
        data_lst.append(var)
        triplet = (var[1],var[2],var[3])
        triplet_lst.append(triplet)
            
    diff_treshold.print_log("Triplet.shuffle_triplets - before loop")
    
    for i in range(0, num_of_iterations):
        if i % 10 == 0:
            diff_treshold.print_log("Triplet.shuffle_triplets - in loop, i = " + str(i))
        shuf_df = shuffle_df(df_no_zero, "row")
        corr_df = shuf_df.corr(method="pearson",min_periods = 90) #changed from 100!!
        for c,n1,n2 in triplet_lst:
            corr1 = abs(corr_df[c][n1])
            corr2 = abs(corr_df[c][n2])
            low_corr = abs(corr_df[n1][n2])
            score = min(corr1,corr2) / low_corr
            perm_var = (i+1,c,n1,n2,corr1,corr2,low_corr,score)
            data_lst.append(perm_var)
    
    diff_treshold.print_log("Triplet.shuffle_triplets - after loop")
    ret_df = pd.DataFrame(data = data_lst,columns = ["perm num","center","node1","node2","corr1","corr2","low corr","triplet score"])
    
    ret_df.to_csv(ret_df_path,index = False)
    
    if is_return:
        return ret_df
예제 #11
0
파일: network.py 프로젝트: ronlevy6/labov
def combine_dicts(dict1, dict2):
    ''' This method gets 2 dictionaries and combines them'''
    
    if dict1 == None:
        return dict2
    if dict2 == None:
        return dict1
    
    diff_treshold.print_log("combine dicts - start")
    combined = {}
    keys = list(dict1.keys()) + list(dict2.keys())
    # remove duplicates using the set datatype    
    keys_non_dup = remove_dups(keys) 
    #gather values for each key and remove duplicates
    for key in keys_non_dup:
        vals = dict1.get(key, []) + dict2.get(key, [])
        vals_non_dup = remove_dups(vals)
        combined[key] = vals_non_dup
        
    diff_treshold.print_log("combine dicts - after gathering all data")
    return combined
예제 #12
0
파일: network.py 프로젝트: ronlevy6/labov
def add_markers_for_traits(graph, trait_analysis_path):
    ''' this method adds the markers of each trait (node) in the graph'''
    
    diff_treshold.print_log("add_markers_for_traits start before read file")
    
    trait_analysis_df = pd.read_excel(trait_analysis_path, header = 0, index_col = "Trait ID")
    
    diff_treshold.print_log("add_markers_for_traits after read file before loop")
    # run on all traits, for each one, check if has a node in the graph. 
    
    for trait in trait_analysis_df.index:
        if trait in graph:
            #trait has a node, take only the markers that are "+" and add as attributes
            trait_data = trait_analysis_df.loc[trait]
            plus_filtered_data = trait_data[trait_data == "+"]
            plus_markers = list(plus_filtered_data.index)
            minus_filtered_data = trait_data[trait_data == "-"]
            minus_markers = list(minus_filtered_data.index)
            markers = []
            for i in range(len(plus_markers)):
                var = plus_markers[i]
                insert_var = var + "+"
                markers.append(insert_var)
            
            for j in range(len(minus_markers)):
                var = minus_markers[j]
                insert_var = var + "-"
                markers.append(insert_var)
                
            markers.sort()
            graph.add_node(trait, markers_lst = markers)
    
    diff_treshold.print_log("add_markers_for_traits finished")
예제 #13
0
파일: network.py 프로젝트: ronlevy6/labov
def create_p_val_network_one_side(p_val_file, max_p_val,index_col,filter_col):
    ''' this method gets an outfile R script file and creates a network according to it 
    using index col as edge start and filter col as edge end'''
    
    diff_treshold.print_log("create_p_val_network_one_side - start")    

    network_dict = {}   
    
    df = pd.read_csv(p_val_file,header = 0,index_col = index_col)
    
    good_p_vals = df[df["needed_p_val"] < max_p_val]
    
    good_p_vals.sort_values(by = "needed_p_val", ascending = True, inplace = True)
    
    diff_treshold.print_log("create_p_val_network_one_side - after read file and filter DF")    
    
    idx_lst = remove_dups(good_p_vals.index)
    for idx in idx_lst:
        vals = good_p_vals.loc[idx][filter_col] #get trait2 values for specific trait
        
        if type(vals) == str:
            vals = [vals] #one variable list
        else:
            vals = list(vals)
        network_dict[idx] = vals
    
    diff_treshold.print_log("create_p_val_network_one_side - after dict - return it")           
    
    return network_dict
예제 #14
0
def degrees_of_nodes_in_connected_coponents(graph, is_save):
    ''' this method gets a graph and for each connected component creates a histogrm
    of the degrees of nodes in it'''

    diff_treshold.print_log("degrees_of_nodes_in_connected_coponents - start")

    connected_components = list(nx.connected_components(graph))
    for i in range(0, len(connected_components)):
        group = connected_components[i]
        degree_lst = []
        for node in group:
            degree_lst.append(graph.degree(node))

        #create a dictionary from the list and plot it
        diff_treshold.print_log(
            "degrees_of_nodes_in_connected_coponents - before plotting")
        d = create_dict_from_list(degree_lst)
        label = "degrees per connected component with " + str(
            len(group)) + " cells"
        xlabel = "degree"
        ylabel = "count"
        file_name = "degree_for_connected_component_number_" + str(i) + ".png"
        histogram_dict(d, file_name, label, xlabel, ylabel, 'pink', is_save)
예제 #15
0
파일: network.py 프로젝트: ronlevy6/labov
def create_full_graph(p_val_file, max_p_val, united_corr_pass_path, trait_analysis_path):
    ''' this method runs all the needed stuff to get a graph'''
    
    diff_treshold.print_log("create_full_graph - before create network")    
    
    network = create_p_val_network(p_val_file,max_p_val)
    
    diff_treshold.print_log("create_full_graph - before create graph")        
    
    graph = create_graph_from_network(network)
    
    diff_treshold.print_log("create_full_graph - before add corr")    
    
    add_corr_to_graph(graph,united_corr_pass_path)
    
    diff_treshold.print_log("create_full_graph - before add markers")        
    
    add_markers_for_traits(graph,trait_analysis_path)
    
    return graph       
예제 #16
0
def check_triplets_significance(triplets_shuffled_data_path, pct_to_check ,result_lst_path, is_return = False):
    ''' this method gets as input the output of shuffle_triplets and checks 
    for each triplet (perm num == 0) if the score is bigger than the score in pct_to_check location'''
    
    diff_treshold.print_log("Triplet.check_triplets_significance - start, before read file")
    triplets_shuffled_data = pd.read_csv(triplets_shuffled_data_path,header = 0, index_col = ["center","node1","node2"])
    
    result_lst = []
    all_triplets = list(set(triplets_shuffled_data.index))
    
    
    diff_treshold.print_log("Triplet.check_triplets_significance - after read file, before loop")    
    
    for i in range(0, len(all_triplets)):
        if i % 15 == 0:
            diff_treshold.print_log("Triplet.check_triplets_significance - in loop, i = " + str(i))                
            
        center,node1,node2 = triplet = all_triplets[i]
        triplet_data = triplets_shuffled_data.loc[triplet]
        #get the triplet real score
        orig_score = triplet_data[triplet_data["perm num"] == 0]
        orig_score = float(orig_score["triplet score"])
        #get all the randomized scores
        triplet_data = triplet_data[triplet_data["perm num"] != 0]     
        rand_scores = triplet_data["triplet score"]
        rand_scores_not_nan = rand_scores.dropna()
        rand_scores_good = list(rand_scores_not_nan)
        #check if the triplet is significant
        rand_scores_good.sort()
        n = len(rand_scores_good)
        needed_pos = int(n * pct_to_check)
        if orig_score >= rand_scores_good[needed_pos]:
            is_sig = True
        else:
            is_sig = False
        #real_pos = binary_search(rand_scores,orig_score,0,n)
        #real_pct = real_pos / n
        real_pct = get_real_pct(rand_scores_good,orig_score)
        var = (center, node1,node2, orig_score,is_sig,real_pct)
        result_lst.append(var)
        
    df = pd.DataFrame(data = result_lst, columns = ["center","node1","node2","triplet score", "is significant","real pct"])
    
    df.to_csv(result_lst_path, index = False)
    
    if is_return:
        return df
예제 #17
0
파일: network.py 프로젝트: ronlevy6/labov
def add_corr_to_graph(graph, united_pass_path):
    ''' this method gets the graph and a path to a file contains all 
    pairs of traits in the graph and add the correlation as data of the edge'''
    
    diff_treshold.print_log("add_corr_to_graph start")    
    df = pd.read_csv(united_pass_path, header = 0, index_col = ["trait1","trait2"])
    diff_treshold.print_log("add_corr_to_graph after read file, before loop on edges")    
    
    for idx_tup in df.index:
        u = idx_tup[0]
        v = idx_tup[1]
        attr = df.loc[idx_tup]["corr"]
        if graph.has_edge(u, v):
            #attr = df.loc[idx_tup][col_to_add]
            graph.add_edge(u, v, corr = attr)
    
    diff_treshold.print_log("add_corr_to_graph after loop - finish")    
예제 #18
0
파일: network.py 프로젝트: ronlevy6/labov
def merge_graph_nodes(graph,trait_val_path, min_unconneced_treshold, min_common_markers):
    ''' this method unites nodes in the graph when the following conditions are met:
    1 - they have at least min_common_markers (3) common markers and above 66% common markers
    2 - they have the same neighbours. OR - for each uncommon neighbour the correlation
    between the non-connected traits is above min_unconneced_treshold (0.85).
    When merging 2 nodes the following attributes will be changed:
        markers_lst - combine both traits markers
        correlation - for each edge the new correlation will be the average of correlations
        merged_with - list with previous nodes before merge'''
    
    diff_treshold.print_log("merge_graph_nodes - start, before read file")
    
    traits_val_df = pd.read_csv(trait_val_path, header = 0, index_col = "FlowJo Subject ID")
    
    df_no_zero =  traits_val_df.replace(0,np.nan)
    
    to_corr_df = df_no_zero.astype('float32')
    
    corr_df = to_corr_df.corr(method="pearson",min_periods = 100)
    
    diff_treshold.print_log("merge_graph_nodes - after read file and calc corr, before loop")
    
    merged_nodes_lst = [] # a list of tuples to contain all merged nodes
    
    all_nodes = list(graph.nodes())
    n = len(all_nodes)
    
    for i in range(0, n):
        if i % 100 == 0:
            diff_treshold.print_log("merge_graph_nodes - in loop, i = " + str(i))            
        curr_node = all_nodes[i]
        if (curr_node in graph):
            #curr node wasn't merged
            j = i + 1
            while j < n:
                #second node wasn't merged as well
                if (all_nodes[j] in graph):
                    if can_merge_nodes(graph,curr_node,all_nodes[j], corr_df, min_unconneced_treshold, min_common_markers):
                        merged_tup = (curr_node, all_nodes[j])
                        merged_nodes_lst.append(merged_tup)
                        merge_2_nodes(graph, curr_node, all_nodes[j], corr_df)
                        #remove when doing real merging
                        j = n #exit loop and start merging the next node
                j += 1
                        
    return merged_nodes_lst
예제 #19
0
파일: network.py 프로젝트: ronlevy6/labov
def get_network_hubs(network):
    ''' This method gets a dictionary network and returns a list of nodes that have 
    more than avg + 2sd neighbours'''
    
    diff_treshold.print_log("get network hubs start - before get_num_of_neis_per_key")
    (key_num_of_neis,lst_num_of_neis) = get_num_of_neis_per_key(network)
    
    avg_of_nei = sum(lst_num_of_neis) / len (lst_num_of_neis)
    sd = np.std(lst_num_of_neis)    
    treshold = avg_of_nei + 2 * sd
    diff_treshold.print_log("get network hubs - after get_num_of_neis_per_key and calc of statsitacal data")
    
    hub_lst = []
    i = 0
    #key_num_of_neis is ordered
    while i < len(key_num_of_neis) and key_num_of_neis[i][1] > treshold:
        hub_lst.append(key_num_of_neis[i])
        i += 1
    
    diff_treshold.print_log("get network hubs - after appending hub list - return it")
    return hub_lst
예제 #20
0
파일: network.py 프로젝트: ronlevy6/labov
def create_graph_from_network(network):
    ''' This method gets a network and creates a graph from it'''
    
    diff_treshold.print_log("create graph - start")
    
    g = nx.Graph()
    for key in network.keys():
        g.add_node(key, panel_id = diff_treshold.get_panel_id_substr(key))
    #g.add_nodes_from(network.keys())
    
    diff_treshold.print_log("create graph - before adding edges")
    #create list of verteces
    neigbours_lst = []    
    for edge in network.keys():
        for val in network[edge]:
            neigbours_lst.append((edge, val))
        
    g.add_edges_from(neigbours_lst)
    
    diff_treshold.print_log("create graph - after adding edges, return graph")
    return g
예제 #21
0
    def get_graph_Triplets_heatmap(graph, trait_values_path, heatmap_save_path, is_save):
        ''' this method gets a graph (usually, subgraph - a conneceted component)
        find its triplets and creates an heatmap whit X axis as Triplet centers,
        Y axis as all nodes in graph. The value in eah place in the matrix is the 
        Triplet's score, or 0 when the nodes don't have Triplet'''
        
        diff_treshold.print_log("Triplet.get_graph_Triplets_heatmap - start, before create triplets")
        triplets_lst = []
        triplets_lst = Triplet.create_all_triplets(graph, trait_values_path)       
        if len(triplets_lst) == 0:
            #no trios..
            print("no trios here..")
            return triplets_lst,None,None
        diff_treshold.print_log("Triplet.get_graph_Triplets_heatmap - after create triplets")
        
        #get all centers and nodes to create a proper DF
        centers = [triplet.center for triplet in triplets_lst]
        centers_no_dups = network.remove_dups(centers)
        all_nodes = list(graph.nodes())
        #create a dataframe of zeros with triplets centers as indexs and nodes as columns
        df = pd.DataFrame(0.0,index = centers_no_dups, columns = all_nodes)
        
        diff_treshold.print_log("Triplet.get_graph_Triplets_heatmap - df ready, fill it")
                
        for triplet in triplets_lst:
            #check the score exists and there is a correlation result between the nodes
            curr_score = triplet.score
            if curr_score != curr_score:
                curr_score = -1
            
            df_node1_score = df.loc[triplet.center, triplet.node1]
            if df_node1_score != 0:
                #more than one triplet with these cells, do average
                new_score = (curr_score + df_node1_score) / 2
            else:
                new_score = curr_score
            df = df.set_value(triplet.center, triplet.node1, new_score)

            #same for node2
            df_node2_score = df.loc[triplet.center, triplet.node2]
            if df_node2_score != 0:
                new_score = (curr_score + df_node2_score) / 2
            else:
                new_score = curr_score
            
            df = df.set_value(triplet.center, triplet.node2, new_score)
        
        diff_treshold.print_log("Triplet.get_graph_Triplets_heatmap - after updating DF, create heatmap")
        
        
        #Way number 1 to do it, didn't work beee
        #now df is filled, create an heatmap from it
        plt.new_figure_manager(0)
        heatmap = sns.heatmap(df)
        heatmap.set_title("heatmap of scores between triplets in the graph")
        heatmap_fig = heatmap.get_figure()
        heatmap_fig.show()
        
        '''
        #wWay number 2 - lets hope
        plt.imshow(df)
        plt.colorbar()
        plt.show()     
        heatmap_fig = plt.figure()
        '''
        if is_save:
            #way number 1
            heatmap_fig.savefig(heatmap_save_path, format = "png", dpi = 1200)
            
        #return for number1
        plt.close(0)
        return triplets_lst,heatmap_fig,df
예제 #22
0
 def create_all_triplets(graph, trait_values_path,is_abs = True):
     ''' this method gets a graph and creates all the triplets in it.
     The path of trait_values is needed to get the correlation between
     the unconnected nodes.
     trait_values_file is indicated for the ORIGINAL file!!!'''
         
     diff_treshold.print_log("create_all_triplets - start")
         
     graph_triplets = network.find_all_triplets(graph, "corr")
         
     #create a list with all unnconeted traits
     unconnected_traits = [triplet[1] for triplet in graph_triplets]
     unconnected_traits += [triplet[2] for triplet in graph_triplets]
         
     distinct_unconnected_traits = list(set(unconnected_traits))
         
     triplets_lst = [] # init empty list to contain all triplet objects
         
     diff_treshold.print_log("create_all_triplets - after find_all_triplets, before read file")
         
     df = pd.read_excel(trait_values_path, header = 0, index_col = "FlowJo Subject ID")
         
     diff_treshold.print_log("create_all_triplets - after read file before creating good DF")        
         
     filtered_df = df.loc[distinct_unconnected_traits]
         
     corr_df = diff_treshold.create_corr_between_all(filtered_df)
         
     diff_treshold.print_log("create_all_triplets - DF is good, before loop")        
     
     for i in range(0 , len(graph_triplets)):
         if i % 800 == 0:
             diff_treshold.print_log("create_all_triplets - in loop, i = " + str(i))
         
         (center,node1,node2,corr1,corr2) = graph_triplets[i]
         unconnected_corr = corr_df.loc[node1,node2]
         if is_abs:
             unconnected_corr = abs(unconnected_corr)
         curr_triplet = Triplet(center,node1, node2, corr1, corr2, unconnected_corr)
         triplets_lst.append(curr_triplet)
         
     diff_treshold.print_log("create_all_triplets - after loop - finished") 
     return triplets_lst
예제 #23
0
파일: network.py 프로젝트: ronlevy6/labov
def create_network(treshold_dir):
    ''' This method gets a dir of tresholded files and creates the connections 
    for each sub panel id and returns a united dictionary'''
    diff_treshold.print_log("create_network - start")           
    final_dict = {}
    for sub_panel_id in SUB_PANELS_LST:
        diff_treshold.print_log("create_network - now for sub panel id " + sub_panel_id)           
        d = create_sub_panel_id_connections(treshold_dir,sub_panel_id,"trait1","trait2")
        diff_treshold.print_log("create_network - after first create connections, before combine")           
        final_dict = combine_dicts(final_dict,d)
        diff_treshold.print_log("create_network - after first combine")
        
        ## second run on the same sub panel id with different traits as index
        diff_treshold.print_log("create_network - second run for sub panel id " + sub_panel_id)
        d = create_sub_panel_id_connections(treshold_dir,sub_panel_id,"trait2","trait1")
        diff_treshold.print_log("create_network - after second create connections, before combine")
        final_dict = combine_dicts(final_dict,d)
        diff_treshold.print_log("create_network - after second combine")
    
    diff_treshold.print_log("create_network - finish, before return")           
    return final_dict