Beispiel #1
0
    Create custom Loss of MSE loss (weighted MSE loss) and L1 penality (L1 regulizer).

    weight - of size input, weight of class for each value
    """
    if isinstance(weighted, torch.Tensor):
        return torch.sum(weighted * (input - target)**2)
    else:
        return crit(input, target)


# Get data
backup_dir = "backup_cpkl/"
med_name = "PsychroMesoThermHyperMedium"
#Get the scope matrix by running scope_kegg_prk.py
simplified_matrix = pk.load(open(
    joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name)),
    "rb"),
                            encoding='latin1')  #Scope matrix

df_species = pd.read_csv("species_metadata.csv", index_col=0)
df_species.loc[df_species.sp_codes.isnull(),
               "sp_codes"] = "nan"  # Otherwise interpreted as NaN

# Standardisation
growth_temp_crT = (df_species.temp_def.dropna().values -
                   np.min(df_species.temp_def.dropna().values)) / (
                       np.max(df_species.temp_def.dropna().values) -
                       np.min(df_species.temp_def.dropna().values))
matrix_tempT = simplified_matrix[:, df_species.temp_def.notnull()]
classes_T = df_species.temp_range_deduced[
    df_species.temp_range_deduced.notnull()].values
Beispiel #2
0
 valid_species_index = []    
 for i, code in enumerate(df_species.sp_codes):
     old_dir = None
     
     print "\n", i, df_species.iloc[i,[0, 1]].values
         
     try :
         obj = MetabolicGraph(df_species.iloc[i, 1], "", #no need for fasta for brite reconstruction
                              code=code, work_dir=org_dir, 
                              through_brite=through_bri)
     except SystemExit, IOError:
         continue
         
     try:
         obj.directory = joinP(org_dir, 
                               df_species.iloc[i, 1].replace("/", ".").replace(" ", "_") 
                               + "_" + df_species.iloc[i, 0])
         
         
         obj.get_reaction_graph(gname="metabolites_reaction_" + code + ".graphml", 
                                pklname="metabolites_reactions_graph_" + code + ".cpkl",
                                dir_ec="EC_global/", rn_dir="reaction_files/",
                                through_brite=through_bri)
         obj.build_reaction_graph(filtr=False, 
                                  gname="metabolites_reaction_" + code + ".graphml", 
                                  pklname="metabolites_reactions_graph_" + code + ".cpkl")
         valid_species_index.append(i)
     except (SystemExit, IOError, TypeError): #Problem when constructing
         os.system("rm -rf " + obj.directory) #remove directory
         logger.error("Species %s %s will not be handled" %(df_species.iloc[i, 1], code))
         continue
Beispiel #3
0
    work_dir = "graph_species/"  # working directory : directory where we can find our graphs
    backup_dir = "backup_cpkl/"
    if not os.path.exists(work_dir):
        logger.error("Directory with graphs %s could not be found" % work_dir)
        raise SystemExit()

    if not os.path.exists(backup_dir):
        logger.error("Backup directory backup_cpkl/ could not be found")
        raise SystemExit()

    df_species = pd.read_csv("species_metadata.csv", index_col=0)
    df_species.loc[df_species.sp_codes.isnull(),
                   "sp_codes"] = "nan"  # Otherwise interpreted as NaN

    #either get backup or generate scope
    if os.path.exists(joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name))) and\
        os.path.exists(joinP(backup_dir, "simplified_nodes_scope{}_prk.cpk".format(med_name))):
        simplified_matrix = cpk.load(
            open(
                joinP(backup_dir,
                      "simplified_matrix_scope{}_prk.cpk".format(med_name)),
                "rb"))
        nodes_simplified = cpk.load(
            open(
                joinP(backup_dir,
                      "simplified_nodes_scope{}_prk.cpk".format(med_name)),
                "rb"))
    else:
        all_scope = []
        all_all_nodes = []
Beispiel #4
0
            "#a15d90", "#a7bc74", "#5c6a9f", "#a65d22", "#8a4768", "#d6ac66", "#ee90a1", "#758348", "#c7726c",
            "#746015", "#e89871", "#8e651e", "#944d32", "#896f3b", "#b78050"]

colours3 = ["blue", "#f32a30", "#de81d3", "#98cd28", "#278300","#fb2376", "#6adc90","#45124e", "#0082ad",
            "#7c4f00", "#5a8fff","#dbc672", "#bd9eff","#ffa988", "#535254","#fdb0cd", "#b20085", "#54d3ff",] #red b90a06, red i like #fc4d45 violeta 8a335b, blue 0049c3


backup_dir = "backup_cpkl/"
med_name = "PsychroMesoThermHyperMedium"

#Get species metadata
df_species = pd.read_csv("species_metadata.csv", index_col=0)
df_species.loc[df_species.sp_codes.isnull(), "sp_codes"] = "nan"  # Otherwise interpreted as NaN

#Get the scope matrix by running scope_kegg_prk.py :
simplified_matrix = pk.load(open(joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name)), "rb"), encoding='latin1')

plotNull = True #Whether to plot species without metadata or not
perp = 40 #t-SNE perplexity
distance_tsne = "jaccard" #Distance to use for t-SNE


#Evaluating distance matrix
if distance_tsne == "jaccard":
    if exists("backup_cpkl/jaccard_simi.pk"):
        similarity = pk.load(open("backup_cpkl/jaccard_simi.pk", "rb"))
    else:
        similarity = sim_matrix(simplified_matrix.T) #takes some time
    dist = 1 - similarity
elif distance_tsne == "manhattan":
    dist = manhattan_distances(simplified_matrix.T)
                         ])
                break
        if plot:
            plt.ylabel("Accuracy")
            plt.xlabel("Position of first compound in mask")
            plt.title("Mask size: {}".format(step))
            plt.plot()
        return accs



if __name__ == '__main__':
    backup_dir = "backup_cpkl/"
    med_name = "PsychroMesoThermHyperMedium"
    #The following files are generated by scope_kegg_prk.py
    simplified_matrix = cpk.load(open(joinP(backup_dir, "simplified_matrix_scope{}_prk.cpk".format(med_name)), "rb"))
    nodes_simplified = cpk.load(open(joinP(backup_dir, "simplified_nodes_scope{}_prk.cpk".format(med_name)), "rb"))


    df_species = pd.read_csv("species_metadata.csv", index_col=0)
    df_species.loc[df_species.sp_codes.isnull(), "sp_codes"] = "nan"  # Otherwise interpreted as NaN


    # =============================================================================
    #
    #                   TEMPERATURE CLASS PREDICTION
    #
    # =============================================================================


    matrix_temp = simplified_matrix[:, df_species.temp_range_deduced.notnull()]