def species_production_reaction(data_dir, spe='OH', top_n=50, norm=False): """ species production reaction in a path multiply by pathway probability """ print(data_dir) f_n_n = os.path.join(data_dir, "output", "pathway_name_candidate.csv") f_n_p = os.path.join(data_dir, "output", "pathway_prob.csv") pathway_name = np.genfromtxt(f_n_n, dtype=str, delimiter='\n') pathway_prob = np.genfromtxt(f_n_p, dtype=float, delimiter='\n') _, spe_name_ind_dict = psri.parse_spe_info(data_dir) reaction_map = dict() for _, (p_n, p_p) in enumerate(zip(pathway_name, pathway_prob)): map_tmp = parse_pattern.parse_species_production_reaction( p_n, 'S' + spe_name_ind_dict[spe]) for key, value in map_tmp.items(): if key not in reaction_map: reaction_map[key] = value * p_p else: reaction_map[key] += value * p_p d_f = pd.DataFrame(list( sorted(reaction_map.items(), key=lambda x: x[1], reverse=True)), columns=['reaction', 'frequency']) if norm is True: total = sum(d_f['frequency']) d_f['frequency'] /= total f_n_out1 = os.path.join(data_dir, "output", spe + "_production_reaction_index.csv") d_f[0:top_n].to_csv(f_n_out1, header=False, index=False, sep=',', columns=['reaction', 'frequency']) # load reaction info _, new_ind_reaction_dict = psri.parse_reaction_and_its_index(data_dir) # convert species reaction index to real species and reactions d_f['reaction'] = d_f['reaction'].apply( lambda x: psri.reaction_name_to_real_reaction(new_ind_reaction_dict, x ).strip()) # print(d_f['reaction']) f_n_out2 = os.path.join(data_dir, "output", spe + "_production_reaction_name.csv") d_f[0:top_n].to_csv(f_n_out2, header=False, index=False, sep=',', columns=['reaction', 'frequency'])
def convert_concentration_to_path_prob(data_dir, atom_followed="C", spe_conc=None, renormalization=True): """ convert concentration to corresponding total pathway probability for example, C3H8, suppose [C3H8] = 1.0 and we are following "C" atom, then the corresponding total pathway probability should be 1.0 * 3, since each C3H8 has 3 "C" atoms Warning: spe_conc should be read from dlsode calculation, it is guaranteed outside that dimensions of spe_conc match the mechanism """ if spe_conc is None: return None if spe_conc is []: return None _, spe_n_i_d = psri.parse_spe_info(data_dir) spe_composition = psri.read_spe_composition( os.path.join(data_dir, "input", "spe_composition.json")) spe_idx_coefficient = dict() for _, val in enumerate(spe_composition): if atom_followed in spe_composition[val]: spe_idx_coefficient[spe_n_i_d[val]] = float( spe_composition[val][atom_followed]) else: spe_idx_coefficient[spe_n_i_d[val]] = 0.0 #print(spe_composition, spe_idx_coefficient) if np.shape(spe_conc)[0] > 0: if np.shape(spe_conc[0]) is (): print("1D array", "shape:\t", len(spe_conc)) for idx, _ in enumerate(spe_conc): spe_conc[idx] *= float(spe_idx_coefficient[str(idx)]) if renormalization is True: spe_conc /= np.sum(spe_conc) else: print("2D array", "shape:\t", np.shape(spe_conc)) for idx in range(np.shape(spe_conc)[1]): spe_conc[:, idx] *= float(spe_idx_coefficient[str(idx)]) if renormalization is True: for idx, _ in enumerate(spe_conc): spe_conc[idx, :] /= np.sum(spe_conc[idx, :]) return spe_conc
def convert_path_prob_to_concentration(data_dir, atom_followed="C", path_prob=None, default_coef=None): """ if default_coef is not None, use it as default coefficient convert total pathway probability to concentration for example, C3H8, suppose [C3H8] = 1.0 and we are following "C" atom, then the corresponding total pathway probability should be 1.0 * 3, since each C3H8 has 3 "C" atoms, in other word, concentration should be pathway probability divide by 3.0 """ if path_prob is None: return None if path_prob is []: return None _, spe_n_i_d = psri.parse_spe_info(data_dir) spe_composition = psri.read_spe_composition( os.path.join(data_dir, "input", "spe_composition.json")) spe_idx_coefficient = dict() for _, val in enumerate(spe_composition): if atom_followed in spe_composition[val]: spe_idx_coefficient[spe_n_i_d[val]] = float( spe_composition[val][atom_followed]) else: spe_idx_coefficient[spe_n_i_d[val]] = 0.0 if default_coef is not None: for val in spe_idx_coefficient: if spe_idx_coefficient[val] != 0: spe_idx_coefficient[val] = default_coef if np.shape(path_prob)[0] > 0: if np.shape(path_prob[0]) is (): print("1D array", "shape:\t", len(path_prob)) for idx, _ in enumerate(path_prob): if float(spe_idx_coefficient[str(idx)]) != 0: path_prob[idx] /= float(spe_idx_coefficient[str(idx)]) return path_prob
def parse_spe_production_along_path(data_dir, top_n=10, spe_idx=10, init_spe=62, atom_followed="C", end_t=1.0, species_path=False, axis=0, path_branching_factor=False, s_consumption=False, s_production=True): """ parse species peoduction along path, note species might not explictly shown on path but are side products of reaction on pathway if path_idx is None, use top_n path if path_idx is not None, instead it is a list, use only selected path, the output file name thereafter ends with "selected_path" """ id_tmp = "" if spe_idx is None or spe_idx is []: return elif isinstance(spe_idx, int): id_tmp = str(spe_idx) spe_idx = [spe_idx] else: for x_t in spe_idx: if id_tmp == "": id_tmp = str(x_t) else: id_tmp += "_" + str(x_t) suffix = naming.get_suffix(data_dir, init_spe=init_spe, atom_followed=atom_followed, end_t=end_t) prefix = "" if species_path is True: prefix = "species_" f_n_path_name = os.path.join( data_dir, "output", prefix + "pathway_name_candidate" + suffix + ".csv") pathname_data = np.genfromtxt(f_n_path_name, dtype=str, max_rows=top_n + 1) # in case of two dimensional pathway name if len(np.shape(pathname_data)) == 2: pathname_data = pathname_data[:, axis] net_reactant = psri.parse_reaction_net_reactant(data_dir) net_product = psri.parse_reaction_net_product(data_dir) s_p_r_c = psri.parse_species_pair_reaction(data_dir) if path_branching_factor is True: atom_scheme = asch.get_atom_scheme(data_dir) s_idx_name, _ = psri.parse_spe_info(data_dir) s_p_c = [] for _, p_n in enumerate(pathname_data): spe_consumption_count = 0 spe_production_count = 0 for s_i in spe_idx: if s_consumption is True: spe_consumption_count += parse_pattern.parse_species_along_path_using_reaction( p_n, net_reactant, s_i, s_p_r_c) if s_production is True: spe_production_count += parse_pattern.parse_species_along_path_using_reaction( p_n, net_product, s_i, s_p_r_c) path_branching_number = 1 if path_branching_factor is True: path_branching_number = parse_pattern.calculate_path_branching_number( pathname=p_n, net_reactant=net_reactant, net_product=net_product, s_idx_name=s_idx_name, atom_scheme=atom_scheme, atom_followed=atom_followed) s_p_c.append((spe_production_count - spe_consumption_count) * path_branching_number) if id_tmp != "": suffix += "_" + id_tmp f_n_spe_production_count = os.path.join( data_dir, "output", prefix + "pathway_species_production_count" + suffix + ".csv") np.savetxt(f_n_spe_production_count, s_p_c, fmt='%d')
def plot_network(data_dir, fname="", pathname="", pathprob=1.0, path_idx=None, end_t=1.0, suffix="", atom_followed="C", species_path=False): """ plot network manually """ print(fname) n_coordinate = get_names_coordinates(data_dir, fname) prefix = "" if species_path is True: prefix = "species_" # figure name if suffix is "": fig_name = prefix + "network_path_" + str(path_idx) + ".jpg" else: fig_name = prefix + "network_path_" + \ str(path_idx) + str(suffix) + ".jpg" # specify label for lines labels = [] x = [] y = [] name_idx_dict = dict() for i_tmp, val in enumerate(n_coordinate): labels.append(val) name_idx_dict[val] = i_tmp x.append(float(n_coordinate[val][0])) y.append(float(n_coordinate[val][1])) # read in species index name spe_idx_name_dict, spe_name_idx_dict = psri.parse_spe_info(data_dir) spe_alias_latex = read_spe_alias( os.path.join(data_dir, "input", "spe_alias_latex.json")) _, new_ind_reaction_dict = psri.parse_reaction_and_its_index(data_dir) # modify labels spe_union_find_group = global_settings.get_union_find_group( DATA_DIR, atom_followed) for idx, val in enumerate(labels): spe_i = spe_name_idx_dict[val] if spe_i in spe_union_find_group: labels[idx] = ",".join([ str(spe_idx_name_dict[str(x)]) for x in spe_union_find_group[spe_i] ]) print(labels) for idx, val in enumerate(labels): labels[idx] = change_spe_name(val, spe_alias_latex, None) print(labels) fig, a_x = plt.subplots(1, 1, sharex=True, sharey=False) # background a_x.scatter(x, y, color='b', marker="o", alpha=0.3) for i, _ in enumerate(x): t_h = a_x.annotate(labels[i], (x[i], y[i])) t_h.set_alpha(0.15) # get rid of R-1000003S90, don't need it here pathname = re.sub(r"R-\d+S\d+", r'', pathname) # parse pathway matched_spe = re.findall(r"S(\d+)", pathname) matched_reaction = re.findall(r"R(\d+)", pathname) print(matched_spe, matched_reaction) node_list = [ name_idx_dict[change_spe_name(str(x), spe_idx_name_dict, spe_union_find_group)] for x in matched_spe ] print(node_list) for idx, curr_idx in enumerate(node_list): if idx >= 1: pre_idx = node_list[idx - 1] a_h = a_x.annotate('', xy=(x[curr_idx], y[curr_idx]), xytext=(x[pre_idx], y[pre_idx]), arrowprops={ 'arrowstyle': '->', 'lw': 4, 'color': 'red' }, va='center') a_h.set_alpha(0.9) # re-draw points and labels on canvas for _, val in enumerate(node_list): a_x.scatter(x[val], y[val], color='r', marker="o", alpha=0.9) t_h = a_x.annotate(labels[val], (x[val], y[val])) t_h.set_alpha(0.9) # draw reaction along path if species_path is False: # check for duplicate transition idx_label_dict = {} for idx, curr_idx in enumerate(node_list): if idx >= 1: pre_idx = node_list[idx - 1] rxn_idx = matched_reaction[idx - 1] if tuple([pre_idx, curr_idx, rxn_idx]) in idx_label_dict: idx_label_dict[tuple([pre_idx, curr_idx, rxn_idx])] += "," + str(idx) else: idx_label_dict[tuple([pre_idx, curr_idx, rxn_idx])] = str(idx) for idx, curr_idx in enumerate(node_list): if idx >= 1: pre_idx = node_list[idx - 1] rxn_idx = matched_reaction[idx - 1] rxn_name = idx_label_dict[tuple( [pre_idx, curr_idx, rxn_idx])] + ": " + str( new_ind_reaction_dict[matched_reaction[idx - 1]]) if x[pre_idx] <= x[curr_idx]: x_tmp = x[pre_idx] else: x_tmp = x[curr_idx] y_tmp = y[pre_idx] * 0.7 + y[curr_idx] * 0.3 t_h = a_x.annotate(rxn_name, (x_tmp, y_tmp), color='g', size=8.0) t_h.set_alpha(0.5) else: # build idx->label idx_label_dict = {} for idx, curr_idx in enumerate(node_list): if idx >= 1: pre_idx = node_list[idx - 1] if tuple([pre_idx, curr_idx]) in idx_label_dict: idx_label_dict[tuple([pre_idx, curr_idx])] += "," + str(idx) else: idx_label_dict[tuple([pre_idx, curr_idx])] = str(idx) for idx, curr_idx in enumerate(node_list): if idx >= 1: pre_idx = node_list[idx - 1] rxn_name = idx_label_dict[tuple([pre_idx, curr_idx])] t_h = a_x.annotate(rxn_name, (x[pre_idx] * 0.7 + x[curr_idx] * 0.3, y[pre_idx] * 0.7 + y[curr_idx] * 0.3), color='g', size=8.0) t_h.set_alpha(0.5) a_x.set_xlim([ np.min(x) - 0.01 * (np.max(x) - np.min(x)), np.max(x) + 0.25 * (np.max(x) - np.min(x)) ]) # a_x.grid('on') a_x.axis('off') a_x.set_frame_on(False) a_x.set_xticks([]) # this is needed for bbox_inches a_x.set_yticks([]) if (path_idx == 1): a_x.set_title("P$_{" + str(path_idx) + "}$" + " = " + "{:.6e}".format(float(pathprob))) else: a_x.set_title("P$_{" + str(path_idx) + "}$" + " = " + "{:.2e}".format(float(pathprob))) # fig.tight_layout() # plt.subplots_adjust(left=0.01, right=0.9, top=0.9, bottom=0.01) fig.savefig(os.path.join(data_dir, "output", fig_name), bbox_inches='tight', dpi=500) # bbox_inches='tight', pad_inches=0, dpi=500) plt.close() return
def init_directed_network_from_X_and_R_at_a_time(data_dir, tag="M", tau=10.0, end_t=0.5, end_t2=None, x_y_dict=None): """ init directed network without parallel edges return networkx.DiGraph at least time-snapshot network at a time, end_t, a second time can be added, but don't change network structure, instead add a second attribute "weight2" to edges """ s_idx_2_name, _ = psri.parse_spe_info(data_dir) spe_alias = read_spe_alias( os.path.join(data_dir, "input", "spe_alias.json")) time_v = np.loadtxt(os.path.join(data_dir, "output", "time_dlsode_M.csv"), dtype=float, delimiter=',') conc_mat = np.loadtxt(os.path.join( data_dir, "output", "concentration_dlsode_" + str(tag) + ".csv"), delimiter=",") rxn_rates_mat = np.loadtxt(os.path.join( data_dir, "output", "reaction_rate_dlsode_" + str(tag) + ".csv"), delimiter=",") # the time point where reference time tau is # use interpolation here idx_array = [i for i in range(len(time_v))] time_axis = int( round(interpolation.interp1d(time_v, idx_array, tau * end_t))) if time_axis >= len(time_v): time_axis = len(time_v) - 1 conc_v = conc_mat[time_axis, :] rxn_rates_v = rxn_rates_mat[time_axis, :] if end_t2 is not None: time_axis2 = int( round(interpolation.interp1d(time_v, idx_array, tau * end_t2))) if time_axis2 >= len(time_v): time_axis2 = len(time_v) - 1 rxn_rates_v2 = rxn_rates_mat[time_axis2, :] species_set = set() species_pair_weight = {} if end_t2 is not None: species_pair_weight2 = {} # species pairs-reactions-coefficient s_p_r_c = psri.parse_species_pair_reaction(data_dir) # print(s_p_r_c) for s1, s2 in s_p_r_c: species_set.add(int(s1)) # print(s1, s2) species_set.add(int(s2)) if (int(s1), int(s2)) not in species_pair_weight: species_pair_weight.update({(int(s1), int(s2)): 0.0}) if end_t2 is not None: if (int(s1), int(s2)) not in species_pair_weight2: species_pair_weight2.update({(int(s1), int(s2)): 0.0}) for idx in s_p_r_c[(s1, s2)]: r_idx = int(s_p_r_c[(s1, s2)][idx]['r_idx']) c1 = float(s_p_r_c[(s1, s2)][idx]['c1']) c2 = float(s_p_r_c[(s1, s2)][idx]['c2']) flux = rxn_rates_v[r_idx] * c2 / c1 species_pair_weight[(int(s1), int(s2))] += flux if end_t2 is not None: flux2 = rxn_rates_v2[r_idx] * c2 / c1 species_pair_weight2[(int(s1), int(s2))] += flux2 # print(species_set) # print(species_pair_weight) edge_weight_v = [] for idx, key in enumerate(species_pair_weight): edge_weight_v.append(float(species_pair_weight[key])) if end_t2 is not None: edge_weight_v2 = [] for idx, key in enumerate(species_pair_weight2): edge_weight_v2.append(float(species_pair_weight2[key])) # rescase concentrations # conc_v = rescale_array(conc_v, 10.0, 25.0) conc_v = rescale_array_v2(conc_v, 1.0, 5.0, 15.0, 25.0, -12) # edge_weight_v = rescale_array(edge_weight_v, 2.0, 25.0) edge_weight_v = rescale_array_v2(edge_weight_v, 0.5, 1.0, 15.0, 25.0, -9) # edge weights write to file e_w_fn1 = os.path.join(data_dir, "output", "edge_weight1_" + str(end_t) + ".csv") f_hanlder1 = open(e_w_fn1, 'w') f_hanlder1.write("Source,Target,Weight" + str(end_t) + "\n") # np.savetxt(e_w_fn1, edge_weight_v, fmt='%.18e', newline='\n') if end_t2 is not None: # edge_weight_v2 = rescale_array(edge_weight_v2, 2.0, 25.0) edge_weight_v2 = rescale_array_v2(edge_weight_v2, 1.5, 2.5, 15.0, 25.0, -9) e_w_fn2 = os.path.join(data_dir, "output", "edge_weight2_" + str(end_t2) + ".csv") # np.savetxt(e_w_fn2, edge_weight_v2, fmt='%.18e', newline='\n') f_hanlder2 = open(e_w_fn2, 'w') f_hanlder2.write("Source,Target,Weight" + str(end_t2) + "\n") # final directed graph di_graph = nx.DiGraph() # add nodes first for idx, val in enumerate(species_set): weight = float(conc_v[int(val)]) node_name = change_spe_name(s_idx_2_name[str(val)], spe_alias, None) # add a layer to control whether to show the species label name label_name = '' if s_idx_2_name[str(val)] in spe_alias: label_name = node_name if x_y_dict is None: di_graph.add_node(node_name, label=label_name, weight=weight) else: di_graph.add_node(node_name, label=label_name, weight=weight, x=x_y_dict[node_name][0], y=x_y_dict[node_name][1]) # add edges for idx, key in enumerate(species_pair_weight): src = key[0] dst = key[1] src_name = change_spe_name(s_idx_2_name[str(src)], spe_alias, None) dst_name = change_spe_name(s_idx_2_name[str(dst)], spe_alias, None) name = src_name + "," + dst_name # write weight1 to file f_hanlder1.write(src_name + "," + dst_name + "," + str(weight) + "\n") if end_t2 is None: weight = float(edge_weight_v[idx]) di_graph.add_edge(src_name, dst_name, name=name, weight=weight, weight2=weight) else: weight = float(edge_weight_v[idx]) weight2 = float(edge_weight_v2[idx]) di_graph.add_edge(src_name, dst_name, name=name, weight=weight, weight2=weight2) # write weight2 to file f_hanlder2.write(src_name + "," + dst_name + "," + str(weight2) + "\n") f_hanlder1.close() if end_t2 is not None: f_hanlder2.close() return di_graph
def init_directed_network(data_dir, path_idx=None, init_spe=None, atom_followed="C", end_t=None, species_path=False, time_axis=0): """ init directed network without parallel edges return networkx.DiGraph """ spe_idx_name_dict, _ = psri.parse_spe_info(data_dir) suffix = get_suffix(data_dir, init_spe=init_spe, atom_followed=atom_followed, end_t=end_t) prefix = "" if species_path is True: prefix = "species_" f_n_path_name = os.path.join( data_dir, "output", prefix + "pathway_name_candidate" + suffix + ".csv") f_n_path_prob = os.path.join(data_dir, "output", prefix + "pathway_prob" + suffix + ".csv") print(f_n_path_name, f_n_path_prob) p_n = np.genfromtxt(f_n_path_name, dtype=str, delimiter=',') p_p = np.genfromtxt(f_n_path_prob, dtype=float, delimiter=',') # in case of two dimensional pathway name if len(np.shape(p_n)) == 2: p_n = p_n[:, time_axis] if len(np.shape(p_p)) == 2: p_p = p_p[:, time_axis] # retrieve pathway name and pathway probability before sort p_n = [p_n[i] for i in path_idx] p_p = [p_p[i] for i in path_idx] # set the data type seperately d_f_n = pd.DataFrame(p_n, columns=['name'], dtype=str) d_f_p = pd.DataFrame(p_p, columns=['prob'], dtype=float) d_f = pd.concat([d_f_n, d_f_p], axis=1) d_f.sort_values(by='prob', ascending=False, inplace=True, na_position='last') d_f.reset_index(drop=True, inplace=True) print(d_f.head()) # temporary directed graph d_g_tmp = nx.DiGraph() # modify labels spe_union_find_group = global_settings.get_union_find_group( DATA_DIR, atom_followed) # record all nodes nodes = set() for _, val in d_f.iterrows(): matched_spe = re.findall(r"S(\d+)", val['name']) for _, spe in enumerate(matched_spe): nodes.add( change_spe_name(spe, spe_idx_name_dict, union_find=spe_union_find_group)) for _, val in enumerate(nodes): d_g_tmp.add_node(val, weight=0.0, label=str(val)) for _, val in d_f.iterrows(): prob = float(val['prob']) # get rid of R-1000003S90, don't need it here print(val['name']) path_name_tmp = re.sub(r"R-\d+S\d+", r'', val['name']) print(path_name_tmp) # pathway contains both reaction and species if species_path is False: matched_spe = re.findall(r"S(\d+)", path_name_tmp) matched_reaction = re.findall(r"R(\d+)", path_name_tmp) for idx, spe in enumerate(matched_spe): d_g_tmp.node[change_spe_name( spe, spe_idx_name_dict, union_find=spe_union_find_group)]['weight'] += 1.0 * prob if idx > 0: src = change_spe_name(matched_spe[idx - 1], spe_idx_name_dict, union_find=spe_union_find_group) dest = change_spe_name(spe, spe_idx_name_dict, union_find=spe_union_find_group) rxn = change_rxn_name(matched_reaction[idx - 1]) if d_g_tmp.has_edge(src, dest): d_g_tmp[src][dest]['weight'] += 1.0 * prob d_g_tmp[src][dest]['reactions'].add(rxn) else: d_g_tmp.add_edge(src, dest, reactions=set([rxn]), weight=1.0 * prob) else: matched_spe = re.findall(r"S(\d+)", path_name_tmp) for idx, spe in enumerate(matched_spe): d_g_tmp.node[change_spe_name( spe, spe_idx_name_dict, union_find=spe_union_find_group)]['weight'] += 1.0 * prob if idx > 0: src = change_spe_name(matched_spe[idx - 1], spe_idx_name_dict, union_find=spe_union_find_group) dest = change_spe_name(spe, spe_idx_name_dict, union_find=spe_union_find_group) rxn = '-1' if d_g_tmp.has_edge(src, dest): d_g_tmp[src][dest]['weight'] += 1.0 * prob d_g_tmp[src][dest]['reactions'].add(rxn) else: d_g_tmp.add_edge(src, dest, reactions=set([rxn]), weight=1.0 * prob) # update directed graph, for example, # 1. reactions is originally a set, combine to get a string of reactions # 2. smooth and re-normalize node weight # 3. re-normalize edge weight node_weight = [] for _, val in enumerate(d_g_tmp.nodes()): node_weight.append(d_g_tmp.node[val]['weight']) edge_weight = [] for _, val in enumerate(d_g_tmp.edges()): edge_weight.append(d_g_tmp[val[0]][val[1]]['weight']) node_weight = rescale_array(node_weight, 1.0, 5.0) edge_weight = rescale_array(edge_weight, 3.0, 15.0) # final directed graph di_graph = nx.DiGraph() for idx, val in enumerate(d_g_tmp.nodes()): di_graph.add_node(val, weight=node_weight[idx]) for idx, val in enumerate(d_g_tmp.edges()): src = val[0] dest = val[1] rxn_set = d_g_tmp[src][dest]['reactions'] rxn_set = sorted(rxn_set, key=lambda x: int(x), reverse=False) name = ",".join(x for x in rxn_set) weight = edge_weight[idx] di_graph.add_edge(src, dest, name=name, weight=weight) return di_graph
def get_species_with_top_n_concentration(data_dir, exclude, top_n=10, traj_max_t=100.0, tau=10.0, end_t=1.0, tag="M", atoms=None): """ get species concentration at a tau, where tau is the ratio of the time_wanted/end_time """ if atoms is None: atoms = ["C"] if exclude is None: exclude = [] time = np.loadtxt(os.path.join(data_dir, "output", "time_dlsode_" + str(tag) + ".csv"), delimiter=",") conc_all = np.loadtxt(os.path.join( data_dir, "output", "concentration_dlsode_" + str(tag) + ".csv"), delimiter=",") n_spe = np.shape(conc_all)[1] data = [float] * n_spe for i in range(n_spe): data[i] = interpolation.interp1d(time, conc_all[:, i], tau * end_t) c_idx_map = defaultdict(set) for idx, val in enumerate(data): c_idx_map[val].add(str(idx)) c_idx_map = OrderedDict(sorted(c_idx_map.items(), reverse=True)) spe_idx_name_dict, _ = psri.parse_spe_info(data_dir) spe_composition = psri.read_spe_composition( os.path.join(data_dir, "input", "spe_composition.json")) spe_idx_list = [] counter = 0 for _, val in enumerate(c_idx_map): if counter < top_n: spe_idx = next(iter(c_idx_map[val])) indicator = False for _, atom in enumerate(atoms): if atom in spe_composition[spe_idx_name_dict[spe_idx]]: indicator = True break if spe_idx_name_dict[spe_idx] not in exclude and indicator: print(val, spe_idx, spe_idx_name_dict[spe_idx]) spe_idx_list.append(int(spe_idx)) counter += 1 # species doesn't contain atom we are interested in exclude_spe_name_list = [] for idx, s_n_t in enumerate(spe_composition): indicator = False for _, atom in enumerate(atoms): if atom in spe_composition[s_n_t]: indicator = True if indicator is False: exclude_spe_name_list.append(s_n_t) spe_name_list = [str(spe_idx_name_dict[str(x)]) for x in spe_idx_list] return spe_idx_list, spe_name_list, exclude_spe_name_list
def plot_spe_drc(data_dir, spe_idx=None, tau=10.0, end_t=1.0, tag="fraction", reciprocal=False): """ plot species destruction rate constant, give species index list """ spe_idx_tmp = deepcopy(spe_idx) if spe_idx_tmp is None: spe_idx_tmp = [0] colors, markers, _ = get_colors_markers_linestyles() s_idx_n, _ = psri.parse_spe_info(data_dir) s_idx_n["-1"] = "Temp" spe_idx_tmp.append(-1) time = np.loadtxt(os.path.join( data_dir, "output", "time_dlsode_" + str(tag) + ".csv"), delimiter=",") temp = np.loadtxt(os.path.join(data_dir, "output", "temperature_dlsode_" + str(tag) + ".csv"), delimiter=",") spe_drc = np.loadtxt(os.path.join(data_dir, "output", "drc_dlsode_" + str(tag) + ".csv"), delimiter=",") counter = 0 # the time point where reference time tau is tau_time_point = float(tau) / time[-1] * len(time) end_point = int(end_t * tau_time_point) delta_n = int(end_point / 10) if delta_n is 0: delta_n = 1 fig, a_x_left = plt.subplots(1, 1, sharex=True, sharey=False) for s_idx in spe_idx_tmp: if s_idx == -1: a_x_right = a_x_left.twinx() a_x_right.plot(time[0:end_point], temp[0:end_point], markevery=delta_n, color=colors[-1], label=s_idx_n[str(s_idx)]) else: if counter < len(colors) - 1: m_k = None else: m_k = markers[(counter + 1 - len(colors)) % (len(markers))] if reciprocal is False: a_x_left.semilogy(time[0:end_point], spe_drc[0:end_point, s_idx], marker=m_k, markevery=delta_n, color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)]) else: a_x_left.semilogy(time[0:end_point], 1.0 / spe_drc[0:end_point, s_idx], marker=m_k, markevery=delta_n, color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)]) counter += 1 if reciprocal is False: leg_left = a_x_left.legend(loc=9, fancybox=True, prop={'size': 10.0}) else: leg_left = a_x_left.legend(loc=8, fancybox=True, prop={'size': 10.0}) leg_right = a_x_right.legend(loc=4, fancybox=True, prop={'size': 10.0}) leg_left.get_frame().set_alpha(0.7) leg_right.get_frame().set_alpha(0.7) a_x_left.grid() a_x_left.set_xlim([0.05, time[end_point]]) a_x_left.set_xlabel("time/s") if reciprocal is False: a_x_left.set_ylabel("k/s$^{-1}$") else: a_x_left.set_ylabel("k$^{-1}/s$") a_x_right.set_ylabel("T/K") s_n_str = "_".join(s_idx_n[str(x)] for x in spe_idx_tmp) # plt.title(s_n_str) if reciprocal is False: fig.savefig(os.path.join(data_dir, "output", "spe_drc_" + s_n_str + ".jpg"), dpi=500) else: fig.savefig(os.path.join(data_dir, "output", "spe_drc_reciprocal_" + s_n_str + ".jpg"), dpi=500) plt.close()
def plot_concentrations(data_dir, spe_idx=None, tau=10.0, end_t=1.0, tag="fraction", exclude_names=None, renormalization=True, semilogy=False, hasTemp=True): """ plot concentrations give species index list, if exclude is not None, means we are going to renormalize the molelar fraction """ if exclude_names is None: exclude_names = [] spe_idx_tmp = deepcopy(spe_idx) if spe_idx_tmp is None: spe_idx_tmp = [0] colors, markers, _ = get_colors_markers_linestyles() s_idx_n, _ = psri.parse_spe_info(data_dir) if hasTemp is True: s_idx_n["-1"] = "Temp" spe_idx_tmp.append(-1) time = np.loadtxt(os.path.join( data_dir, "output", "time_dlsode_" + str(tag) + ".csv"), delimiter=",") temp = np.loadtxt(os.path.join(data_dir, "output", "temperature_dlsode_" + str(tag) + ".csv"), delimiter=",") conc = trajectory.get_normalized_concentration( data_dir, tag=tag, exclude_names=exclude_names, renormalization=renormalization) counter = 0 # the time point where reference time tau is tau_time_point = float(tau) / time[-1] * len(time) end_point = int(end_t * tau_time_point) delta_n = int(end_point / 10) if delta_n is 0: delta_n = 1 fig, a_x_left = plt.subplots(1, 1, sharex=True, sharey=False) for s_idx in spe_idx_tmp: if s_idx == -1: a_x_right = a_x_left.twinx() a_x_right.plot(time[0:end_point], temp[0:end_point], color=colors[-1], label=s_idx_n[str(s_idx)]) else: if counter < len(colors) - 1: m_k = None else: m_k = markers[(counter + 1 - len(colors)) % (len(markers))] if semilogy is True: a_x_left.semilogy(time[0:end_point], conc[0:end_point, s_idx], marker=m_k, markevery=delta_n, color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)]) else: a_x_left.plot(time[0:end_point], conc[0:end_point, s_idx], marker=m_k, markevery=delta_n, color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)]) counter += 1 leg_left = a_x_left.legend(loc=8, fancybox=True, prop={'size': 10.0}) leg_left.get_frame().set_alpha(0.7) a_x_left.grid() a_x_left.set_xlim([0, tau * end_t]) a_x_left.xaxis.set_major_formatter(FormatStrFormatter('%.1e')) a_x_left.set_xlabel("Time/sec") a_x_left.set_ylabel("[X]") if hasTemp is True: leg_right = a_x_right.legend(loc=2, fancybox=True, prop={'size': 10.0}) leg_right.get_frame().set_alpha(0.7) a_x_right.set_ylabel("T/K") s_n_str = "_".join(s_idx_n[str(x)] for x in spe_idx_tmp) # plt.title(s_n_str) fig.savefig(os.path.join(data_dir, "output", "trajectory_" + s_n_str + ".jpg"), dpi=500) plt.close()
def get_species_with_top_n_concentration(data_dir, exclude, top_n=10, traj_max_t=100.0, tau=10.0, end_t=1.0, tag="M", atoms=None): """ get species concentration at a tau, where tau is the ratio of the time_wanted/end_time """ if atoms is None: atoms = ["C"] if exclude is None: exclude = [] conc = np.loadtxt(os.path.join( data_dir, "output", "concentration_dlsode_" + str(tag) + ".csv"), delimiter=",") # the time point where reference time tau is tau_time_point = float(tau) / traj_max_t * len(conc) time_idx = int(end_t * tau_time_point) if time_idx >= len(conc): time_idx = (len(conc) - 1) data = conc[time_idx, :] c_idx_map = defaultdict(set) for idx, val in enumerate(data): c_idx_map[val].add(str(idx)) c_idx_map = OrderedDict(sorted(c_idx_map.items(), reverse=True)) spe_idx_name_dict, _ = psri.parse_spe_info(data_dir) spe_composition = psri.read_spe_composition( os.path.join(data_dir, "input", "spe_composition.json")) spe_idx_list = [] counter = 0 for _, val in enumerate(c_idx_map): if counter < top_n: spe_idx = next(iter(c_idx_map[val])) indicator = False for _, atom in enumerate(atoms): if atom in spe_composition[spe_idx_name_dict[spe_idx]]: indicator = True break if spe_idx_name_dict[spe_idx] not in exclude and indicator: print(val, spe_idx, spe_idx_name_dict[spe_idx]) spe_idx_list.append(int(spe_idx)) counter += 1 # species doesn't contain atom we are interested in exclude_spe_name_list = [] for idx, s_n_t in enumerate(spe_composition): indicator = False for _, atom in enumerate(atoms): if atom in spe_composition[s_n_t]: indicator = True if indicator is False: exclude_spe_name_list.append(s_n_t) spe_name_list = [str(spe_idx_name_dict[str(x)]) for x in spe_idx_list] return spe_idx_list, spe_name_list, exclude_spe_name_list