def main(family): #reference from the git script data_folder = os.path.join(Path(os.getcwd()).parents[1], 'data') #obtain all data from the irreducible set fam_mfs=MFS_family(family, data_folder + '/reactomes/all_families/',data_folder + '/models/all_models' ) #####get reaction frequency###### full_freq_m = fam_mfs.freq_m.copy() #only reactions that should be included in the analysis full_freq_m=full_freq_m.T[fam_mfs.include_reactome].T av_freq_m = np.mean(full_freq_m, axis=0) #######get_model_frequency######## #get the model reaction frequency model_sample = np.zeros((1000, len(av_freq_m))) for i in range(1000): print(i) s1 = get_even_distances(fam_mfs.model_reactomes) mf = np.sum(fam_mfs.model_reactomes[s1], axis=0)/len(s1) model_sample[i] = mf[fam_mfs.include_reactome] ###get_environment##### ev =Env_ball(1000) transporter = ev.transporters[:] #water_index = transporter.index('EX_cpd00001_e') transporter.remove('EX_cpd00001_e') #oxygen_index =transporter.index('EX_cpd00007_e') transporter.remove('EX_cpd00007_e') #external metabolites. Water and Oxygen are excluded transporter=np.array(transporter) mc = fam_mfs.model.copy() used_environment = np.zeros((1000, 290)) for i in range(1000): gc.collect() v = fam_mfs.mfs[str(i)][fam_mfs.include_reactome].T used_environment[i] = get_environment_sample(mc, ev.matrix[i], ev.transporters, fam_mfs.reactome[fam_mfs.include_reactome], v, transporter,1000) print(i) store = {'used_env':used_environment.copy(), 'model_sample':model_sample.copy(), 'full_freq_m':full_freq_m.copy(), 'reactome':fam_mfs.reactome[fam_mfs.include_reactome], 'transporter':transporter.copy()} pickle.dump(store, open(data_folder + '/pickles/' + family + '.pkl', 'wb'))
def add_transporter(self, model): ev = Env_ball(1000) for reaction in ev.transporters: met_id=reaction.replace('EX_','') met_name = ev.metabolites_d[met_id] react = Reaction(reaction) react.name = 'export of ' + met_name react.lower_bound = -1000. # This is the default react.upper_bound = 1000. # This is the default if not model.metabolites.has_id(met_id): m_e = Metabolite(met_id, name=met_name,compartment='e') react.add_metabolites({m_e: -1.0}) model.add_reactions([react]) else: react.add_metabolites({model.metabolites.get_by_id(met_id): -1.0}) model.add_reactions([react]) model.repair() model.optimize()
for i, name in enumerate(self.reactome): if self.gene_counts[i] > 0: #condition 1) #condition 3) if fva.loc[name]['minimum'] < 0: index[i] = 1 elif fva.loc[name]['maximum'] > 0: index[i] = 1 #condition 2) if self.model.reactions.get_by_id(name).reversibility: if nx.has_path(g, source=name + '_f', target='bio1'): index[i] = 1 elif nx.has_path(g, source=name + '_r', target='bio1'): index[i] = 1 else: if nx.has_path(g, source=name, target='bio1'): index[i] = 1 self.include_reactome = index.astype(np.bool) ev = Env_ball(1000) #families = os.listdir(pathToFamilyEFMs) #fam_mfs={} #for i in [families[0]]: # fam_panEFM[i]=panEFM_family(i, pathToFamilyEFMs,pathToModels)
def main(family): #reference from the git script data_folder = os.path.join(Path(os.getcwd()).parents[1], 'data') #load pickles fam_panEFM = pickle.load( open(data_folder + '/pickles/' + family + '.panEFM.pkl', 'rb')) fam_associate = pickle.load( open(data_folder + '/pickles/' + family + '.associate.pkl', 'rb')) #obtain all the primary data structures (see: Measures & data structures) #Reactome:[r] reactome = fam_associate['reactome'].copy() # Metabolome: [M] metabolome = fam_associate['transporter'].copy() # Environment Ball: [N1][M] environment_ball_class = Env_ball(1000) #exclude oxygen and water met_idx = np.array( [environment_ball_class.transporters.index(i) for i in metabolome]) environment_ball = environment_ball_class.matrix.T[met_idx].T #FIRS: [E][N1][r] #get only the included reactions firs = { i: fam_panEFM.panEFM[i][fam_panEFM.include_reactome].T for i in fam_panEFM.panEFM } # Niche: [E][N1][M] niche = fam_associate['used_env'].copy() #shuffle sample orders for i in range(1000): r = np.arange(1000) np.random.shuffle(r) firs[str(i)] = firs[str(i)][r] niche[i] = niche[i][r] # Niche binary: [E][N1][M] niche_binary = {} for i in niche: niche_binary[i] = niche[i].copy() niche_binary[i][np.round(niche_binary[i], 10) != 0] = 1.0 #Models: [s] models = np.array([i.replace('.sbml', '') for i in fam_panEFM.model_files]) # Model reactomes: [s][r] model_reactomes = fam_panEFM.model_reactomes.copy() model_reactomes = model_reactomes.T[fam_panEFM.include_reactome].T #Model sample: [d][r] model_sample_idx = np.array( get_even_distances(model_reactomes, metric='hamming')) model_sample = model_reactomes[model_sample_idx] # FIRS growth rate: [E][N1] firs_growth_rate = np.zeros((1000, 1000)) for i in range(1000): firs_growth_rate[i] = np.sum(niche[i], axis=1) #remove CO2 and H+ met_idx = (metabolome != 'EX_cpd00011_e') & (metabolome != 'EX_cpd00067_e') metabolome = metabolome[met_idx] environment_ball = environment_ball.T[met_idx].T for i in niche: niche[i] = niche[i].T[met_idx].T for i in niche_binary: niche_binary[i] = niche_binary[i].T[met_idx].T ######Secondary Data Structures### #Size of FIRS: [E][N1] size_of_firs = np.zeros((1000, 1000)) for i in range(1000): size_of_firs[i] = np.sum(firs[str(i)], axis=1) #Size of Niche: [E][N1] size_of_niches = np.zeros((1000, 1000)) for i in range(1000): size_of_niches[i] = np.sum(niche_binary[i], axis=1) #Size of models: [s] size_of_models = np.zeros(len(models)) for i, v in enumerate(model_reactomes): size_of_models[i] = sum(v > 0) #Fluidity of FIRS within environments: [E] fluidity_firs_within = np.zeros(1000) for i in range(1000): fluidity_firs_within[i] = get_fluidity_index(firs[str(i)], 1000) #Fluidity of FIRS across environments: [N2] fluidity_firs_across = np.zeros(10000) for i in range(10000): rintA = np.random.randint(0, 1000, size=2) rintB = np.random.randint(0, 1000, size=2) s1 = sum( np.clip( firs[str(rintA[0])][rintB[0]] - firs[str(rintA[1])][rintB[1]], 0, 1)) s2 = sum( np.clip( firs[str(rintA[1])][rintB[1]] - firs[str(rintA[0])][rintB[0]], 0, 1)) fluidity_firs_across[i] = (s1 + s2) / sum( (firs[str(rintA[0])][rintB[0]] + firs[str(rintA[1])][rintB[1]]) > 0) #Fluidity of niches: [E] fluidity_niche_within = np.zeros(1000) for i in range(1000): fluidity_niche_within[i] = get_fluidity_index(niche_binary[i], 1000) #Fluidity across niches: [N2] fluidity_niche_across = np.zeros(10000) for i in range(10000): rintA = np.random.randint(0, 1000, size=2) rintB = np.random.randint(0, 1000, size=2) s1 = sum( np.clip( niche_binary[rintA[0]][rintB[0]] - niche_binary[rintA[1]][rintB[1]], 0, 1)) s2 = sum( np.clip( niche_binary[rintA[1]][rintB[1]] - niche_binary[rintA[0]][rintB[0]], 0, 1)) fluidity_niche_across[i] = (s1 + s2) / sum( (niche_binary[rintA[0]][rintB[0]] + niche_binary[rintA[1]][rintB[1]]) > 0) #Fluidity of models: [N2] fluidity_models = np.zeros(10000) for i in range(10000): print(i) fluidity_models[i] = get_fluidity_index(model_reactomes, 2) #Fluidity of model samples: [N2] fluidity_model_samples = np.zeros(10000) for i in range(10000): print(i) fluidity_model_samples[i] = get_fluidity_index(model_sample, 2) #Frequency of reactions: [E][r] freq_reactions = np.zeros((1000, len(reactome))) for i in range(1000): freq_reactions[i] = np.sum(firs[str(i)], axis=0) / 1000 #Residual reactions frequency: [E][r] freq_reactions_m = np.mean(freq_reactions, axis=0) residual_reaction_freq = freq_reactions - freq_reactions_m #niche driven score for reactions: [r] niche_score_reactions = np.round(np.std(residual_reaction_freq, axis=0), 5) #Reaction frequency in models: [r] freq_mod_reactions = np.sum(model_reactomes, axis=0) / len(models) #Reaction frequency in model sample[r] freq_mod_samp_reactions = np.sum(model_sample, axis=0) / len(model_sample) #Metabolite usage frequency: [E][M] freq_metabolite_use = np.zeros((1000, len(metabolome))) for i in range(1000): freq_metabolite_use[i] = np.sum(niche_binary[i], axis=0) / 1000 freq_metabolite_use_m = np.mean(freq_metabolite_use, axis=0) residual_metabolite_freq = freq_metabolite_use - freq_metabolite_use_m #metabolite usage flux [E][M] metabolite_usage_flux = np.zeros((1000, len(metabolome))) for i in range(1000): metabolite_usage_flux[i] = np.sum(niche[i], axis=0) / 1000 #residual metabolite usage flux: [E][M] metabolite_usage_flux_m = np.mean(metabolite_usage_flux, axis=0) residual_metabolite_usage_flux = metabolite_usage_flux - metabolite_usage_flux_m #niche driven score for metabolites: [M] niche_score_metabolites = np.round( np.std(residual_metabolite_usage_flux, axis=0), 5) #####x, y: non zero reactions frequencies and metabolites usage flux#### x_reactome = reactome[niche_score_reactions != 0] x_reac_freq = freq_reactions.T[niche_score_reactions != 0].T y_metabolome = metabolome[niche_score_metabolites != 0] y_met_usage_flux = metabolite_usage_flux.T[niche_score_metabolites != 0].T y_met_freq = freq_metabolite_use.T[niche_score_metabolites != 0].T #correlation: [r][M] correlation = np.zeros((len(x_reactome), len(y_metabolome))) for i, reac in enumerate(x_reac_freq.T): correlation[i] = np.array([ sts.pearsonr(reac.flatten(), metab.flatten())[0] for metab in y_met_usage_flux.T ]) #correlation metabolite frequency correlation_met_freq = np.zeros((len(x_reactome), len(y_metabolome))) for i, reac in enumerate(x_reac_freq.T): correlation_met_freq[i] = np.array([ sts.pearsonr(reac.flatten(), metab.flatten())[0] for metab in y_met_freq.T ]) #Reaction pairwise distance: [E][E] reaction_pairwise_distance = sps.distance.squareform( sps.distance.pdist(freq_reactions)) #FIRS pairwise distance: [E] firs_pairwise_distance = np.zeros(1000) for i in range(1000): firs_pairwise_distance[i] = np.mean( sps.distance.pdist(firs[str(i)], metric='hamming')) #Niche binary pairwise distance: [E] niche_binary_pairwise_distance = np.zeros(1000) for i in range(1000): niche_binary_pairwise_distance[i] = np.mean( sps.distance.pdist(niche_binary[i], metric='hamming')) #Niche pairwise distance: [E] niche_pairwise_distance = np.zeros(1000) for i in range(1000): niche_pairwise_distance[i] = np.mean(sps.distance.pdist(niche[i])) #Niche distance: [E][E] niche_distance = sps.distance.squareform( sps.distance.pdist(metabolite_usage_flux)) #DNDS_reaction: [N1] dn_reactions = np.zeros(1000) ds_reactions = np.zeros(1000) rand_idx = np.arange(1000) for i in range(1000): f1 = np.random.randint(0, 1000, size=2) np.random.shuffle(rand_idx) ds_reactions[i] = flip_p(firs[str(f1[0])], firs[str(f1[0])][rand_idx], 1000) dn_reactions[i] = flip_p(firs[str(f1[0])], firs[str(f1[1])], 1000) store ={'size_of_firs': size_of_firs, 'size_of_niches': size_of_niches,\ 'size_of_models': size_of_models, 'fluidity_firs_within': fluidity_firs_within,\ 'fluidity_firs_across':fluidity_firs_across, 'fluidity_niche_within':fluidity_niche_within,\ 'fluidity_niche_across': fluidity_niche_across, 'fluidity_models': fluidity_models,\ 'fluidity_model_samples': fluidity_model_samples, 'freq_reactions': freq_reactions, \ 'residual_reaction_freq': residual_reaction_freq, 'niche_score_reactions': niche_score_reactions,\ 'freq_mod_reactions': freq_mod_reactions, 'freq_mod_samp_reactions': freq_mod_samp_reactions,\ 'freq_metabolite_use': freq_metabolite_use, 'metabolite_usage_flux': metabolite_usage_flux,\ 'metabolite_usage_flux_m': metabolite_usage_flux_m, 'niche_score_metabolites':niche_score_metabolites,\ 'x_reactome': x_reactome, 'x_reac_freq':x_reac_freq,\ 'y_metabolome' : y_metabolome, 'y_met_usage_flux':y_met_usage_flux,\ 'y_met_freq': y_met_freq, 'correlation': correlation, \ 'correlation_met_freq':correlation_met_freq, 'reaction_pairwise_distance':reaction_pairwise_distance,\ 'firs_pairwise_distance': firs_pairwise_distance, 'niche_binary_pairwise_distance': niche_binary_pairwise_distance,\ 'niche_pairwise_distance': niche_pairwise_distance, 'niche_distance': niche_distance,\ 'dn_reactions': dn_reactions, 'ds_reactions':ds_reactions} pickle.dump( store, open(data_folder + '/pickles/' + family + '.secondaryDS.pkl', 'wb'))
add_reaction(orig_obj, model, reactions[samples[i]], up_low_b_dict) print(orig_obj, np.round(float(model.slim_optimize()), decimals=12)) prof_r = np.ones(len(reactions)) for i, v in enumerate(reactions): if (model.reactions.get_by_id(v).upper_bound == 0) and (model.reactions.get_by_id(v).lower_bound == 0): prof_r[i] = 0.0 prof_t = np.ones(len(transporters)) for i, v in enumerate(transporters): if model.reactions.get_by_id(v).flux >= 0: prof_t[i] = 0.0 return prof_r, prof_t eb = Env_ball(1000) transporters = eb.transporters[:] random_environments = eb.matrix.copy() model = cobra.io.read_sbml_model( 'C:/Users/danie/Documents/random_work_stuff/home/home/Files/Aeromonadaceae.ensembl.sbml' ) model.solver = 'gurobi' reactions = [i.id for i in model.reactions if 'rxn' in i.id] up_low_b_dict = {} for reaction in reactions: up_low_b_dict[reaction] = (model.reactions.get_by_id(reaction).upper_bound, model.reactions.get_by_id(reaction).lower_bound)