Create custom Loss of MSE loss (weighted MSE loss) and L1 penality (L1 regulizer). weight - of size input, weight of class for each value """ if isinstance(weighted, torch.Tensor): return torch.sum(weighted * (input - target)**2) else: return crit(input, target) # Get data backup_dir = "backup_cpkl/" med_name = "PsychroMesoThermHyperMedium" #Get the scope matrix by running scope_kegg_prk.py simplified_matrix = pk.load(open( joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name)), "rb"), encoding='latin1') #Scope matrix df_species = pd.read_csv("species_metadata.csv", index_col=0) df_species.loc[df_species.sp_codes.isnull(), "sp_codes"] = "nan" # Otherwise interpreted as NaN # Standardisation growth_temp_crT = (df_species.temp_def.dropna().values - np.min(df_species.temp_def.dropna().values)) / ( np.max(df_species.temp_def.dropna().values) - np.min(df_species.temp_def.dropna().values)) matrix_tempT = simplified_matrix[:, df_species.temp_def.notnull()] classes_T = df_species.temp_range_deduced[ df_species.temp_range_deduced.notnull()].values
valid_species_index = [] for i, code in enumerate(df_species.sp_codes): old_dir = None print "\n", i, df_species.iloc[i,[0, 1]].values try : obj = MetabolicGraph(df_species.iloc[i, 1], "", #no need for fasta for brite reconstruction code=code, work_dir=org_dir, through_brite=through_bri) except SystemExit, IOError: continue try: obj.directory = joinP(org_dir, df_species.iloc[i, 1].replace("/", ".").replace(" ", "_") + "_" + df_species.iloc[i, 0]) obj.get_reaction_graph(gname="metabolites_reaction_" + code + ".graphml", pklname="metabolites_reactions_graph_" + code + ".cpkl", dir_ec="EC_global/", rn_dir="reaction_files/", through_brite=through_bri) obj.build_reaction_graph(filtr=False, gname="metabolites_reaction_" + code + ".graphml", pklname="metabolites_reactions_graph_" + code + ".cpkl") valid_species_index.append(i) except (SystemExit, IOError, TypeError): #Problem when constructing os.system("rm -rf " + obj.directory) #remove directory logger.error("Species %s %s will not be handled" %(df_species.iloc[i, 1], code)) continue
work_dir = "graph_species/" # working directory : directory where we can find our graphs backup_dir = "backup_cpkl/" if not os.path.exists(work_dir): logger.error("Directory with graphs %s could not be found" % work_dir) raise SystemExit() if not os.path.exists(backup_dir): logger.error("Backup directory backup_cpkl/ could not be found") raise SystemExit() df_species = pd.read_csv("species_metadata.csv", index_col=0) df_species.loc[df_species.sp_codes.isnull(), "sp_codes"] = "nan" # Otherwise interpreted as NaN #either get backup or generate scope if os.path.exists(joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name))) and\ os.path.exists(joinP(backup_dir, "simplified_nodes_scope{}_prk.cpk".format(med_name))): simplified_matrix = cpk.load( open( joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name)), "rb")) nodes_simplified = cpk.load( open( joinP(backup_dir, "simplified_nodes_scope{}_prk.cpk".format(med_name)), "rb")) else: all_scope = [] all_all_nodes = []
"#a15d90", "#a7bc74", "#5c6a9f", "#a65d22", "#8a4768", "#d6ac66", "#ee90a1", "#758348", "#c7726c", "#746015", "#e89871", "#8e651e", "#944d32", "#896f3b", "#b78050"] colours3 = ["blue", "#f32a30", "#de81d3", "#98cd28", "#278300","#fb2376", "#6adc90","#45124e", "#0082ad", "#7c4f00", "#5a8fff","#dbc672", "#bd9eff","#ffa988", "#535254","#fdb0cd", "#b20085", "#54d3ff",] #red b90a06, red i like #fc4d45 violeta 8a335b, blue 0049c3 backup_dir = "backup_cpkl/" med_name = "PsychroMesoThermHyperMedium" #Get species metadata df_species = pd.read_csv("species_metadata.csv", index_col=0) df_species.loc[df_species.sp_codes.isnull(), "sp_codes"] = "nan" # Otherwise interpreted as NaN #Get the scope matrix by running scope_kegg_prk.py : simplified_matrix = pk.load(open(joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name)), "rb"), encoding='latin1') plotNull = True #Whether to plot species without metadata or not perp = 40 #t-SNE perplexity distance_tsne = "jaccard" #Distance to use for t-SNE #Evaluating distance matrix if distance_tsne == "jaccard": if exists("backup_cpkl/jaccard_simi.pk"): similarity = pk.load(open("backup_cpkl/jaccard_simi.pk", "rb")) else: similarity = sim_matrix(simplified_matrix.T) #takes some time dist = 1 - similarity elif distance_tsne == "manhattan": dist = manhattan_distances(simplified_matrix.T)
]) break if plot: plt.ylabel("Accuracy") plt.xlabel("Position of first compound in mask") plt.title("Mask size: {}".format(step)) plt.plot() return accs if __name__ == '__main__': backup_dir = "backup_cpkl/" med_name = "PsychroMesoThermHyperMedium" #The following files are generated by scope_kegg_prk.py simplified_matrix = cpk.load(open(joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name)), "rb")) nodes_simplified = cpk.load(open(joinP(backup_dir, "simplified_nodes_scope{}_prk.cpk".format(med_name)), "rb")) df_species = pd.read_csv("species_metadata.csv", index_col=0) df_species.loc[df_species.sp_codes.isnull(), "sp_codes"] = "nan" # Otherwise interpreted as NaN # ============================================================================= # # TEMPERATURE CLASS PREDICTION # # ============================================================================= matrix_temp = simplified_matrix[:, df_species.temp_range_deduced.notnull()]