Esempio n. 1
0
def species_production_reaction(data_dir, spe='OH', top_n=50, norm=False):
    """
    species production reaction in a path multiply by pathway probability
    """
    print(data_dir)
    f_n_n = os.path.join(data_dir, "output", "pathway_name_candidate.csv")
    f_n_p = os.path.join(data_dir, "output", "pathway_prob.csv")

    pathway_name = np.genfromtxt(f_n_n, dtype=str, delimiter='\n')
    pathway_prob = np.genfromtxt(f_n_p, dtype=float, delimiter='\n')

    _, spe_name_ind_dict = psri.parse_spe_info(data_dir)
    reaction_map = dict()
    for _, (p_n, p_p) in enumerate(zip(pathway_name, pathway_prob)):
        map_tmp = parse_pattern.parse_species_production_reaction(
            p_n, 'S' + spe_name_ind_dict[spe])
        for key, value in map_tmp.items():
            if key not in reaction_map:
                reaction_map[key] = value * p_p
            else:
                reaction_map[key] += value * p_p
    d_f = pd.DataFrame(list(
        sorted(reaction_map.items(), key=lambda x: x[1], reverse=True)),
                       columns=['reaction', 'frequency'])
    if norm is True:
        total = sum(d_f['frequency'])
        d_f['frequency'] /= total
    f_n_out1 = os.path.join(data_dir, "output",
                            spe + "_production_reaction_index.csv")
    d_f[0:top_n].to_csv(f_n_out1,
                        header=False,
                        index=False,
                        sep=',',
                        columns=['reaction', 'frequency'])

    # load reaction info
    _, new_ind_reaction_dict = psri.parse_reaction_and_its_index(data_dir)
    # convert species reaction index to real species and reactions
    d_f['reaction'] = d_f['reaction'].apply(
        lambda x: psri.reaction_name_to_real_reaction(new_ind_reaction_dict, x
                                                      ).strip())
    # print(d_f['reaction'])
    f_n_out2 = os.path.join(data_dir, "output",
                            spe + "_production_reaction_name.csv")
    d_f[0:top_n].to_csv(f_n_out2,
                        header=False,
                        index=False,
                        sep=',',
                        columns=['reaction', 'frequency'])
def convert_concentration_to_path_prob(data_dir,
                                       atom_followed="C",
                                       spe_conc=None,
                                       renormalization=True):
    """
    convert concentration to corresponding total pathway probability
    for example, C3H8, suppose [C3H8] = 1.0 and we are following "C"
    atom, then the corresponding total pathway probability should be
    1.0 * 3, since each C3H8 has 3 "C" atoms
    Warning: spe_conc should be read from dlsode calculation, it is
    guaranteed outside that dimensions of spe_conc match the mechanism
    """
    if spe_conc is None:
        return None
    if spe_conc is []:
        return None

    _, spe_n_i_d = psri.parse_spe_info(data_dir)
    spe_composition = psri.read_spe_composition(
        os.path.join(data_dir, "input", "spe_composition.json"))

    spe_idx_coefficient = dict()
    for _, val in enumerate(spe_composition):
        if atom_followed in spe_composition[val]:
            spe_idx_coefficient[spe_n_i_d[val]] = float(
                spe_composition[val][atom_followed])
        else:
            spe_idx_coefficient[spe_n_i_d[val]] = 0.0
    #print(spe_composition, spe_idx_coefficient)
    if np.shape(spe_conc)[0] > 0:
        if np.shape(spe_conc[0]) is ():
            print("1D array", "shape:\t", len(spe_conc))
            for idx, _ in enumerate(spe_conc):
                spe_conc[idx] *= float(spe_idx_coefficient[str(idx)])
            if renormalization is True:
                spe_conc /= np.sum(spe_conc)
        else:
            print("2D array", "shape:\t", np.shape(spe_conc))
            for idx in range(np.shape(spe_conc)[1]):
                spe_conc[:, idx] *= float(spe_idx_coefficient[str(idx)])
            if renormalization is True:
                for idx, _ in enumerate(spe_conc):
                    spe_conc[idx, :] /= np.sum(spe_conc[idx, :])

    return spe_conc
Esempio n. 3
0
def convert_path_prob_to_concentration(data_dir,
                                       atom_followed="C",
                                       path_prob=None,
                                       default_coef=None):
    """
    if default_coef is not None, use it as default coefficient
    convert total pathway probability to concentration
    for example, C3H8, suppose [C3H8] = 1.0 and we are following "C"
    atom, then the corresponding total pathway probability should be
    1.0 * 3, since each C3H8 has 3 "C" atoms, in other word, concentration
    should be pathway probability divide by 3.0
    """
    if path_prob is None:
        return None
    if path_prob is []:
        return None

    _, spe_n_i_d = psri.parse_spe_info(data_dir)
    spe_composition = psri.read_spe_composition(
        os.path.join(data_dir, "input", "spe_composition.json"))

    spe_idx_coefficient = dict()
    for _, val in enumerate(spe_composition):
        if atom_followed in spe_composition[val]:
            spe_idx_coefficient[spe_n_i_d[val]] = float(
                spe_composition[val][atom_followed])
        else:
            spe_idx_coefficient[spe_n_i_d[val]] = 0.0

    if default_coef is not None:
        for val in spe_idx_coefficient:
            if spe_idx_coefficient[val] != 0:
                spe_idx_coefficient[val] = default_coef

    if np.shape(path_prob)[0] > 0:
        if np.shape(path_prob[0]) is ():
            print("1D array", "shape:\t", len(path_prob))
            for idx, _ in enumerate(path_prob):
                if float(spe_idx_coefficient[str(idx)]) != 0:
                    path_prob[idx] /= float(spe_idx_coefficient[str(idx)])

    return path_prob
Esempio n. 4
0
def parse_spe_production_along_path(data_dir,
                                    top_n=10,
                                    spe_idx=10,
                                    init_spe=62,
                                    atom_followed="C",
                                    end_t=1.0,
                                    species_path=False,
                                    axis=0,
                                    path_branching_factor=False,
                                    s_consumption=False,
                                    s_production=True):
    """
    parse species peoduction along path, note species might not explictly shown on path
    but are side products of reaction on pathway
    if path_idx is None, use top_n path
    if path_idx is not None, instead it is a list, use only selected path, the output file name
    thereafter ends with "selected_path"
    """
    id_tmp = ""
    if spe_idx is None or spe_idx is []:
        return
    elif isinstance(spe_idx, int):
        id_tmp = str(spe_idx)
        spe_idx = [spe_idx]
    else:
        for x_t in spe_idx:
            if id_tmp == "":
                id_tmp = str(x_t)
            else:
                id_tmp += "_" + str(x_t)

    suffix = naming.get_suffix(data_dir,
                               init_spe=init_spe,
                               atom_followed=atom_followed,
                               end_t=end_t)

    prefix = ""
    if species_path is True:
        prefix = "species_"

    f_n_path_name = os.path.join(
        data_dir, "output",
        prefix + "pathway_name_candidate" + suffix + ".csv")
    pathname_data = np.genfromtxt(f_n_path_name, dtype=str, max_rows=top_n + 1)

    # in case of two dimensional pathway name
    if len(np.shape(pathname_data)) == 2:
        pathname_data = pathname_data[:, axis]

    net_reactant = psri.parse_reaction_net_reactant(data_dir)
    net_product = psri.parse_reaction_net_product(data_dir)
    s_p_r_c = psri.parse_species_pair_reaction(data_dir)

    if path_branching_factor is True:
        atom_scheme = asch.get_atom_scheme(data_dir)
        s_idx_name, _ = psri.parse_spe_info(data_dir)

    s_p_c = []
    for _, p_n in enumerate(pathname_data):
        spe_consumption_count = 0
        spe_production_count = 0
        for s_i in spe_idx:
            if s_consumption is True:
                spe_consumption_count += parse_pattern.parse_species_along_path_using_reaction(
                    p_n, net_reactant, s_i, s_p_r_c)
            if s_production is True:
                spe_production_count += parse_pattern.parse_species_along_path_using_reaction(
                    p_n, net_product, s_i, s_p_r_c)

        path_branching_number = 1
        if path_branching_factor is True:
            path_branching_number = parse_pattern.calculate_path_branching_number(
                pathname=p_n,
                net_reactant=net_reactant,
                net_product=net_product,
                s_idx_name=s_idx_name,
                atom_scheme=atom_scheme,
                atom_followed=atom_followed)

        s_p_c.append((spe_production_count - spe_consumption_count) *
                     path_branching_number)

    if id_tmp != "":
        suffix += "_" + id_tmp
    f_n_spe_production_count = os.path.join(
        data_dir, "output",
        prefix + "pathway_species_production_count" + suffix + ".csv")

    np.savetxt(f_n_spe_production_count, s_p_c, fmt='%d')
Esempio n. 5
0
def plot_network(data_dir,
                 fname="",
                 pathname="",
                 pathprob=1.0,
                 path_idx=None,
                 end_t=1.0,
                 suffix="",
                 atom_followed="C",
                 species_path=False):
    """
    plot network manually
    """
    print(fname)
    n_coordinate = get_names_coordinates(data_dir, fname)

    prefix = ""
    if species_path is True:
        prefix = "species_"

    # figure name
    if suffix is "":
        fig_name = prefix + "network_path_" + str(path_idx) + ".jpg"
    else:
        fig_name = prefix + "network_path_" + \
            str(path_idx) + str(suffix) + ".jpg"

    # specify label for lines
    labels = []
    x = []
    y = []
    name_idx_dict = dict()
    for i_tmp, val in enumerate(n_coordinate):
        labels.append(val)
        name_idx_dict[val] = i_tmp
        x.append(float(n_coordinate[val][0]))
        y.append(float(n_coordinate[val][1]))

    # read in species index name
    spe_idx_name_dict, spe_name_idx_dict = psri.parse_spe_info(data_dir)
    spe_alias_latex = read_spe_alias(
        os.path.join(data_dir, "input", "spe_alias_latex.json"))

    _, new_ind_reaction_dict = psri.parse_reaction_and_its_index(data_dir)

    # modify labels
    spe_union_find_group = global_settings.get_union_find_group(
        DATA_DIR, atom_followed)
    for idx, val in enumerate(labels):
        spe_i = spe_name_idx_dict[val]
        if spe_i in spe_union_find_group:
            labels[idx] = ",".join([
                str(spe_idx_name_dict[str(x)])
                for x in spe_union_find_group[spe_i]
            ])
    print(labels)
    for idx, val in enumerate(labels):
        labels[idx] = change_spe_name(val, spe_alias_latex, None)
    print(labels)

    fig, a_x = plt.subplots(1, 1, sharex=True, sharey=False)

    # background
    a_x.scatter(x, y, color='b', marker="o", alpha=0.3)
    for i, _ in enumerate(x):
        t_h = a_x.annotate(labels[i], (x[i], y[i]))
        t_h.set_alpha(0.15)

    # get rid of R-1000003S90, don't need it here
    pathname = re.sub(r"R-\d+S\d+", r'', pathname)

    # parse pathway
    matched_spe = re.findall(r"S(\d+)", pathname)
    matched_reaction = re.findall(r"R(\d+)", pathname)
    print(matched_spe, matched_reaction)
    node_list = [
        name_idx_dict[change_spe_name(str(x), spe_idx_name_dict,
                                      spe_union_find_group)]
        for x in matched_spe
    ]
    print(node_list)
    for idx, curr_idx in enumerate(node_list):
        if idx >= 1:
            pre_idx = node_list[idx - 1]
            a_h = a_x.annotate('',
                               xy=(x[curr_idx], y[curr_idx]),
                               xytext=(x[pre_idx], y[pre_idx]),
                               arrowprops={
                                   'arrowstyle': '->',
                                   'lw': 4,
                                   'color': 'red'
                               },
                               va='center')
            a_h.set_alpha(0.9)

    # re-draw points and labels on canvas
    for _, val in enumerate(node_list):
        a_x.scatter(x[val], y[val], color='r', marker="o", alpha=0.9)
        t_h = a_x.annotate(labels[val], (x[val], y[val]))
        t_h.set_alpha(0.9)

    # draw reaction along path
    if species_path is False:
        # check for duplicate transition
        idx_label_dict = {}
        for idx, curr_idx in enumerate(node_list):
            if idx >= 1:
                pre_idx = node_list[idx - 1]
                rxn_idx = matched_reaction[idx - 1]
                if tuple([pre_idx, curr_idx, rxn_idx]) in idx_label_dict:
                    idx_label_dict[tuple([pre_idx, curr_idx,
                                          rxn_idx])] += "," + str(idx)
                else:
                    idx_label_dict[tuple([pre_idx, curr_idx,
                                          rxn_idx])] = str(idx)

        for idx, curr_idx in enumerate(node_list):
            if idx >= 1:
                pre_idx = node_list[idx - 1]
                rxn_idx = matched_reaction[idx - 1]
                rxn_name = idx_label_dict[tuple(
                    [pre_idx, curr_idx, rxn_idx])] + ": " + str(
                        new_ind_reaction_dict[matched_reaction[idx - 1]])

                if x[pre_idx] <= x[curr_idx]:
                    x_tmp = x[pre_idx]
                else:
                    x_tmp = x[curr_idx]
                y_tmp = y[pre_idx] * 0.7 + y[curr_idx] * 0.3

                t_h = a_x.annotate(rxn_name, (x_tmp, y_tmp),
                                   color='g',
                                   size=8.0)
                t_h.set_alpha(0.5)
    else:
        # build idx->label
        idx_label_dict = {}
        for idx, curr_idx in enumerate(node_list):
            if idx >= 1:
                pre_idx = node_list[idx - 1]
                if tuple([pre_idx, curr_idx]) in idx_label_dict:
                    idx_label_dict[tuple([pre_idx,
                                          curr_idx])] += "," + str(idx)
                else:
                    idx_label_dict[tuple([pre_idx, curr_idx])] = str(idx)

        for idx, curr_idx in enumerate(node_list):
            if idx >= 1:
                pre_idx = node_list[idx - 1]
                rxn_name = idx_label_dict[tuple([pre_idx, curr_idx])]

                t_h = a_x.annotate(rxn_name,
                                   (x[pre_idx] * 0.7 + x[curr_idx] * 0.3,
                                    y[pre_idx] * 0.7 + y[curr_idx] * 0.3),
                                   color='g',
                                   size=8.0)
                t_h.set_alpha(0.5)

    a_x.set_xlim([
        np.min(x) - 0.01 * (np.max(x) - np.min(x)),
        np.max(x) + 0.25 * (np.max(x) - np.min(x))
    ])
    # a_x.grid('on')
    a_x.axis('off')
    a_x.set_frame_on(False)
    a_x.set_xticks([])  # this is needed for bbox_inches
    a_x.set_yticks([])

    if (path_idx == 1):
        a_x.set_title("P$_{" + str(path_idx) + "}$" + " = " +
                      "{:.6e}".format(float(pathprob)))
    else:
        a_x.set_title("P$_{" + str(path_idx) + "}$" + " = " +
                      "{:.2e}".format(float(pathprob)))

    # fig.tight_layout()
    # plt.subplots_adjust(left=0.01, right=0.9, top=0.9, bottom=0.01)
    fig.savefig(os.path.join(data_dir, "output", fig_name),
                bbox_inches='tight',
                dpi=500)
    # bbox_inches='tight', pad_inches=0, dpi=500)
    plt.close()

    return
Esempio n. 6
0
def init_directed_network_from_X_and_R_at_a_time(data_dir,
                                                 tag="M",
                                                 tau=10.0,
                                                 end_t=0.5,
                                                 end_t2=None,
                                                 x_y_dict=None):
    """
    init directed network
    without parallel edges
    return networkx.DiGraph
    at least time-snapshot network at a time, end_t,
    a second time can be added, but don't change network structure, 
    instead add a second attribute "weight2" to edges
    """
    s_idx_2_name, _ = psri.parse_spe_info(data_dir)
    spe_alias = read_spe_alias(
        os.path.join(data_dir, "input", "spe_alias.json"))

    time_v = np.loadtxt(os.path.join(data_dir, "output", "time_dlsode_M.csv"),
                        dtype=float,
                        delimiter=',')
    conc_mat = np.loadtxt(os.path.join(
        data_dir, "output", "concentration_dlsode_" + str(tag) + ".csv"),
                          delimiter=",")
    rxn_rates_mat = np.loadtxt(os.path.join(
        data_dir, "output", "reaction_rate_dlsode_" + str(tag) + ".csv"),
                               delimiter=",")
    # the time point where reference time tau is
    # use interpolation here
    idx_array = [i for i in range(len(time_v))]
    time_axis = int(
        round(interpolation.interp1d(time_v, idx_array, tau * end_t)))
    if time_axis >= len(time_v):
        time_axis = len(time_v) - 1

    conc_v = conc_mat[time_axis, :]
    rxn_rates_v = rxn_rates_mat[time_axis, :]

    if end_t2 is not None:
        time_axis2 = int(
            round(interpolation.interp1d(time_v, idx_array, tau * end_t2)))
        if time_axis2 >= len(time_v):
            time_axis2 = len(time_v) - 1
        rxn_rates_v2 = rxn_rates_mat[time_axis2, :]

    species_set = set()
    species_pair_weight = {}
    if end_t2 is not None:
        species_pair_weight2 = {}

    # species pairs-reactions-coefficient
    s_p_r_c = psri.parse_species_pair_reaction(data_dir)
    # print(s_p_r_c)
    for s1, s2 in s_p_r_c:
        species_set.add(int(s1))
        # print(s1, s2)
        species_set.add(int(s2))
        if (int(s1), int(s2)) not in species_pair_weight:
            species_pair_weight.update({(int(s1), int(s2)): 0.0})
        if end_t2 is not None:
            if (int(s1), int(s2)) not in species_pair_weight2:
                species_pair_weight2.update({(int(s1), int(s2)): 0.0})

        for idx in s_p_r_c[(s1, s2)]:
            r_idx = int(s_p_r_c[(s1, s2)][idx]['r_idx'])
            c1 = float(s_p_r_c[(s1, s2)][idx]['c1'])
            c2 = float(s_p_r_c[(s1, s2)][idx]['c2'])
            flux = rxn_rates_v[r_idx] * c2 / c1
            species_pair_weight[(int(s1), int(s2))] += flux

            if end_t2 is not None:
                flux2 = rxn_rates_v2[r_idx] * c2 / c1
                species_pair_weight2[(int(s1), int(s2))] += flux2

    # print(species_set)
    # print(species_pair_weight)

    edge_weight_v = []
    for idx, key in enumerate(species_pair_weight):
        edge_weight_v.append(float(species_pair_weight[key]))
    if end_t2 is not None:
        edge_weight_v2 = []
        for idx, key in enumerate(species_pair_weight2):
            edge_weight_v2.append(float(species_pair_weight2[key]))

    # rescase concentrations
    # conc_v = rescale_array(conc_v, 10.0, 25.0)
    conc_v = rescale_array_v2(conc_v, 1.0, 5.0, 15.0, 25.0, -12)
    # edge_weight_v = rescale_array(edge_weight_v, 2.0, 25.0)
    edge_weight_v = rescale_array_v2(edge_weight_v, 0.5, 1.0, 15.0, 25.0, -9)
    # edge weights write to file
    e_w_fn1 = os.path.join(data_dir, "output",
                           "edge_weight1_" + str(end_t) + ".csv")
    f_hanlder1 = open(e_w_fn1, 'w')
    f_hanlder1.write("Source,Target,Weight" + str(end_t) + "\n")
    # np.savetxt(e_w_fn1, edge_weight_v, fmt='%.18e', newline='\n')
    if end_t2 is not None:
        # edge_weight_v2 = rescale_array(edge_weight_v2, 2.0, 25.0)
        edge_weight_v2 = rescale_array_v2(edge_weight_v2, 1.5, 2.5, 15.0, 25.0,
                                          -9)

        e_w_fn2 = os.path.join(data_dir, "output",
                               "edge_weight2_" + str(end_t2) + ".csv")
        # np.savetxt(e_w_fn2, edge_weight_v2, fmt='%.18e', newline='\n')
        f_hanlder2 = open(e_w_fn2, 'w')
        f_hanlder2.write("Source,Target,Weight" + str(end_t2) + "\n")

    # final directed graph
    di_graph = nx.DiGraph()
    # add nodes first
    for idx, val in enumerate(species_set):
        weight = float(conc_v[int(val)])
        node_name = change_spe_name(s_idx_2_name[str(val)], spe_alias, None)
        # add a layer to control whether to show the species label name
        label_name = ''
        if s_idx_2_name[str(val)] in spe_alias:
            label_name = node_name
        if x_y_dict is None:
            di_graph.add_node(node_name, label=label_name, weight=weight)
        else:
            di_graph.add_node(node_name,
                              label=label_name,
                              weight=weight,
                              x=x_y_dict[node_name][0],
                              y=x_y_dict[node_name][1])

    # add edges
    for idx, key in enumerate(species_pair_weight):
        src = key[0]
        dst = key[1]
        src_name = change_spe_name(s_idx_2_name[str(src)], spe_alias, None)
        dst_name = change_spe_name(s_idx_2_name[str(dst)], spe_alias, None)
        name = src_name + "," + dst_name
        # write weight1 to file
        f_hanlder1.write(src_name + "," + dst_name + "," + str(weight) + "\n")
        if end_t2 is None:
            weight = float(edge_weight_v[idx])
            di_graph.add_edge(src_name,
                              dst_name,
                              name=name,
                              weight=weight,
                              weight2=weight)
        else:
            weight = float(edge_weight_v[idx])
            weight2 = float(edge_weight_v2[idx])
            di_graph.add_edge(src_name,
                              dst_name,
                              name=name,
                              weight=weight,
                              weight2=weight2)
            # write weight2 to file
            f_hanlder2.write(src_name + "," + dst_name + "," + str(weight2) +
                             "\n")

    f_hanlder1.close()
    if end_t2 is not None:
        f_hanlder2.close()

    return di_graph
Esempio n. 7
0
def init_directed_network(data_dir,
                          path_idx=None,
                          init_spe=None,
                          atom_followed="C",
                          end_t=None,
                          species_path=False,
                          time_axis=0):
    """
    init directed network
    without parallel edges
    return networkx.DiGraph
    """
    spe_idx_name_dict, _ = psri.parse_spe_info(data_dir)

    suffix = get_suffix(data_dir,
                        init_spe=init_spe,
                        atom_followed=atom_followed,
                        end_t=end_t)
    prefix = ""
    if species_path is True:
        prefix = "species_"

    f_n_path_name = os.path.join(
        data_dir, "output",
        prefix + "pathway_name_candidate" + suffix + ".csv")
    f_n_path_prob = os.path.join(data_dir, "output",
                                 prefix + "pathway_prob" + suffix + ".csv")

    print(f_n_path_name, f_n_path_prob)
    p_n = np.genfromtxt(f_n_path_name, dtype=str, delimiter=',')
    p_p = np.genfromtxt(f_n_path_prob, dtype=float, delimiter=',')

    # in case of two dimensional pathway name
    if len(np.shape(p_n)) == 2:
        p_n = p_n[:, time_axis]
    if len(np.shape(p_p)) == 2:
        p_p = p_p[:, time_axis]

    # retrieve pathway name and pathway probability before sort
    p_n = [p_n[i] for i in path_idx]
    p_p = [p_p[i] for i in path_idx]

    # set the data type seperately
    d_f_n = pd.DataFrame(p_n, columns=['name'], dtype=str)
    d_f_p = pd.DataFrame(p_p, columns=['prob'], dtype=float)
    d_f = pd.concat([d_f_n, d_f_p], axis=1)

    d_f.sort_values(by='prob',
                    ascending=False,
                    inplace=True,
                    na_position='last')
    d_f.reset_index(drop=True, inplace=True)
    print(d_f.head())

    # temporary directed graph
    d_g_tmp = nx.DiGraph()

    # modify labels
    spe_union_find_group = global_settings.get_union_find_group(
        DATA_DIR, atom_followed)

    # record all nodes
    nodes = set()
    for _, val in d_f.iterrows():
        matched_spe = re.findall(r"S(\d+)", val['name'])
        for _, spe in enumerate(matched_spe):
            nodes.add(
                change_spe_name(spe,
                                spe_idx_name_dict,
                                union_find=spe_union_find_group))

    for _, val in enumerate(nodes):
        d_g_tmp.add_node(val, weight=0.0, label=str(val))

    for _, val in d_f.iterrows():
        prob = float(val['prob'])

        # get rid of R-1000003S90, don't need it here
        print(val['name'])
        path_name_tmp = re.sub(r"R-\d+S\d+", r'', val['name'])
        print(path_name_tmp)

        # pathway contains both reaction and species
        if species_path is False:
            matched_spe = re.findall(r"S(\d+)", path_name_tmp)
            matched_reaction = re.findall(r"R(\d+)", path_name_tmp)
            for idx, spe in enumerate(matched_spe):
                d_g_tmp.node[change_spe_name(
                    spe, spe_idx_name_dict,
                    union_find=spe_union_find_group)]['weight'] += 1.0 * prob
                if idx > 0:
                    src = change_spe_name(matched_spe[idx - 1],
                                          spe_idx_name_dict,
                                          union_find=spe_union_find_group)
                    dest = change_spe_name(spe,
                                           spe_idx_name_dict,
                                           union_find=spe_union_find_group)
                    rxn = change_rxn_name(matched_reaction[idx - 1])
                    if d_g_tmp.has_edge(src, dest):
                        d_g_tmp[src][dest]['weight'] += 1.0 * prob
                        d_g_tmp[src][dest]['reactions'].add(rxn)
                    else:
                        d_g_tmp.add_edge(src,
                                         dest,
                                         reactions=set([rxn]),
                                         weight=1.0 * prob)
        else:
            matched_spe = re.findall(r"S(\d+)", path_name_tmp)
            for idx, spe in enumerate(matched_spe):
                d_g_tmp.node[change_spe_name(
                    spe, spe_idx_name_dict,
                    union_find=spe_union_find_group)]['weight'] += 1.0 * prob
                if idx > 0:
                    src = change_spe_name(matched_spe[idx - 1],
                                          spe_idx_name_dict,
                                          union_find=spe_union_find_group)
                    dest = change_spe_name(spe,
                                           spe_idx_name_dict,
                                           union_find=spe_union_find_group)
                    rxn = '-1'
                    if d_g_tmp.has_edge(src, dest):
                        d_g_tmp[src][dest]['weight'] += 1.0 * prob
                        d_g_tmp[src][dest]['reactions'].add(rxn)
                    else:
                        d_g_tmp.add_edge(src,
                                         dest,
                                         reactions=set([rxn]),
                                         weight=1.0 * prob)

    # update directed graph, for example,
    # 1. reactions is originally a set, combine to get a string of reactions
    # 2. smooth and re-normalize node weight
    # 3. re-normalize edge weight
    node_weight = []
    for _, val in enumerate(d_g_tmp.nodes()):
        node_weight.append(d_g_tmp.node[val]['weight'])
    edge_weight = []
    for _, val in enumerate(d_g_tmp.edges()):
        edge_weight.append(d_g_tmp[val[0]][val[1]]['weight'])
    node_weight = rescale_array(node_weight, 1.0, 5.0)
    edge_weight = rescale_array(edge_weight, 3.0, 15.0)

    # final directed graph
    di_graph = nx.DiGraph()
    for idx, val in enumerate(d_g_tmp.nodes()):
        di_graph.add_node(val, weight=node_weight[idx])
    for idx, val in enumerate(d_g_tmp.edges()):
        src = val[0]
        dest = val[1]

        rxn_set = d_g_tmp[src][dest]['reactions']
        rxn_set = sorted(rxn_set, key=lambda x: int(x), reverse=False)
        name = ",".join(x for x in rxn_set)

        weight = edge_weight[idx]
        di_graph.add_edge(src, dest, name=name, weight=weight)

    return di_graph
Esempio n. 8
0
def get_species_with_top_n_concentration(data_dir,
                                         exclude,
                                         top_n=10,
                                         traj_max_t=100.0,
                                         tau=10.0,
                                         end_t=1.0,
                                         tag="M",
                                         atoms=None):
    """
    get species concentration at a tau, where tau is the ratio of the time_wanted/end_time
    """
    if atoms is None:
        atoms = ["C"]
    if exclude is None:
        exclude = []

    time = np.loadtxt(os.path.join(data_dir, "output",
                                   "time_dlsode_" + str(tag) + ".csv"),
                      delimiter=",")
    conc_all = np.loadtxt(os.path.join(
        data_dir, "output", "concentration_dlsode_" + str(tag) + ".csv"),
                          delimiter=",")

    n_spe = np.shape(conc_all)[1]

    data = [float] * n_spe
    for i in range(n_spe):
        data[i] = interpolation.interp1d(time, conc_all[:, i], tau * end_t)

    c_idx_map = defaultdict(set)
    for idx, val in enumerate(data):
        c_idx_map[val].add(str(idx))
    c_idx_map = OrderedDict(sorted(c_idx_map.items(), reverse=True))

    spe_idx_name_dict, _ = psri.parse_spe_info(data_dir)
    spe_composition = psri.read_spe_composition(
        os.path.join(data_dir, "input", "spe_composition.json"))

    spe_idx_list = []
    counter = 0

    for _, val in enumerate(c_idx_map):
        if counter < top_n:
            spe_idx = next(iter(c_idx_map[val]))
            indicator = False
            for _, atom in enumerate(atoms):
                if atom in spe_composition[spe_idx_name_dict[spe_idx]]:
                    indicator = True
                    break
            if spe_idx_name_dict[spe_idx] not in exclude and indicator:
                print(val, spe_idx, spe_idx_name_dict[spe_idx])
                spe_idx_list.append(int(spe_idx))
                counter += 1

    # species doesn't contain atom we are interested in
    exclude_spe_name_list = []
    for idx, s_n_t in enumerate(spe_composition):
        indicator = False
        for _, atom in enumerate(atoms):
            if atom in spe_composition[s_n_t]:
                indicator = True
        if indicator is False:
            exclude_spe_name_list.append(s_n_t)
    spe_name_list = [str(spe_idx_name_dict[str(x)]) for x in spe_idx_list]
    return spe_idx_list, spe_name_list, exclude_spe_name_list
Esempio n. 9
0
def plot_spe_drc(data_dir, spe_idx=None, tau=10.0, end_t=1.0, tag="fraction", reciprocal=False):
    """
    plot species destruction rate constant, give species index list
    """
    spe_idx_tmp = deepcopy(spe_idx)
    if spe_idx_tmp is None:
        spe_idx_tmp = [0]

    colors, markers, _ = get_colors_markers_linestyles()

    s_idx_n, _ = psri.parse_spe_info(data_dir)
    s_idx_n["-1"] = "Temp"

    spe_idx_tmp.append(-1)

    time = np.loadtxt(os.path.join(
        data_dir, "output", "time_dlsode_" + str(tag) + ".csv"), delimiter=",")
    temp = np.loadtxt(os.path.join(data_dir, "output",
                                   "temperature_dlsode_" + str(tag) + ".csv"), delimiter=",")

    spe_drc = np.loadtxt(os.path.join(data_dir, "output",
                                      "drc_dlsode_" + str(tag) + ".csv"), delimiter=",")
    counter = 0
    # the time point where reference time tau is
    tau_time_point = float(tau) / time[-1] * len(time)
    end_point = int(end_t * tau_time_point)
    delta_n = int(end_point / 10)
    if delta_n is 0:
        delta_n = 1

    fig, a_x_left = plt.subplots(1, 1, sharex=True, sharey=False)
    for s_idx in spe_idx_tmp:
        if s_idx == -1:
            a_x_right = a_x_left.twinx()
            a_x_right.plot(time[0:end_point], temp[0:end_point], markevery=delta_n,
                           color=colors[-1], label=s_idx_n[str(s_idx)])
        else:
            if counter < len(colors) - 1:
                m_k = None
            else:
                m_k = markers[(counter + 1 - len(colors)) % (len(markers))]
            if reciprocal is False:
                a_x_left.semilogy(time[0:end_point], spe_drc[0:end_point, s_idx], marker=m_k, markevery=delta_n,
                                  color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)])
            else:
                a_x_left.semilogy(time[0:end_point], 1.0 / spe_drc[0:end_point, s_idx], marker=m_k, markevery=delta_n,
                                  color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)])
            counter += 1
    if reciprocal is False:
        leg_left = a_x_left.legend(loc=9, fancybox=True, prop={'size': 10.0})
    else:
        leg_left = a_x_left.legend(loc=8, fancybox=True, prop={'size': 10.0})

    leg_right = a_x_right.legend(loc=4, fancybox=True, prop={'size': 10.0})
    leg_left.get_frame().set_alpha(0.7)
    leg_right.get_frame().set_alpha(0.7)
    a_x_left.grid()
    a_x_left.set_xlim([0.05, time[end_point]])

    a_x_left.set_xlabel("time/s")
    if reciprocal is False:
        a_x_left.set_ylabel("k/s$^{-1}$")
    else:
        a_x_left.set_ylabel("k$^{-1}/s$")

    a_x_right.set_ylabel("T/K")

    s_n_str = "_".join(s_idx_n[str(x)] for x in spe_idx_tmp)
    # plt.title(s_n_str)

    if reciprocal is False:
        fig.savefig(os.path.join(data_dir, "output",
                                 "spe_drc_" + s_n_str + ".jpg"), dpi=500)
    else:
        fig.savefig(os.path.join(data_dir, "output",
                                 "spe_drc_reciprocal_" + s_n_str + ".jpg"), dpi=500)

    plt.close()
Esempio n. 10
0
def plot_concentrations(data_dir, spe_idx=None, tau=10.0, end_t=1.0, tag="fraction", exclude_names=None,
                        renormalization=True, semilogy=False, hasTemp=True):
    """
    plot concentrations give species index list, if exclude is not None, means we are going
    to renormalize the molelar fraction
    """
    if exclude_names is None:
        exclude_names = []

    spe_idx_tmp = deepcopy(spe_idx)
    if spe_idx_tmp is None:
        spe_idx_tmp = [0]

    colors, markers, _ = get_colors_markers_linestyles()

    s_idx_n, _ = psri.parse_spe_info(data_dir)

    if hasTemp is True:
        s_idx_n["-1"] = "Temp"
        spe_idx_tmp.append(-1)

    time = np.loadtxt(os.path.join(
        data_dir, "output", "time_dlsode_" + str(tag) + ".csv"), delimiter=",")
    temp = np.loadtxt(os.path.join(data_dir, "output",
                                   "temperature_dlsode_" + str(tag) + ".csv"), delimiter=",")

    conc = trajectory.get_normalized_concentration(
        data_dir, tag=tag, exclude_names=exclude_names, renormalization=renormalization)

    counter = 0
    # the time point where reference time tau is
    tau_time_point = float(tau) / time[-1] * len(time)
    end_point = int(end_t * tau_time_point)
    delta_n = int(end_point / 10)
    if delta_n is 0:
        delta_n = 1

    fig, a_x_left = plt.subplots(1, 1, sharex=True, sharey=False)
    for s_idx in spe_idx_tmp:
        if s_idx == -1:
            a_x_right = a_x_left.twinx()
            a_x_right.plot(time[0:end_point], temp[0:end_point],
                           color=colors[-1], label=s_idx_n[str(s_idx)])
        else:
            if counter < len(colors) - 1:
                m_k = None
            else:
                m_k = markers[(counter + 1 - len(colors)) % (len(markers))]
            if semilogy is True:
                a_x_left.semilogy(time[0:end_point], conc[0:end_point, s_idx], marker=m_k, markevery=delta_n,
                                  color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)])
            else:
                a_x_left.plot(time[0:end_point], conc[0:end_point, s_idx], marker=m_k, markevery=delta_n,
                              color=colors[counter % (len(colors) - 1)], label=s_idx_n[str(s_idx)])
            counter += 1
    leg_left = a_x_left.legend(loc=8, fancybox=True, prop={'size': 10.0})
    leg_left.get_frame().set_alpha(0.7)
    a_x_left.grid()
    a_x_left.set_xlim([0, tau * end_t])
    a_x_left.xaxis.set_major_formatter(FormatStrFormatter('%.1e'))

    a_x_left.set_xlabel("Time/sec")
    a_x_left.set_ylabel("[X]")

    if hasTemp is True:
        leg_right = a_x_right.legend(loc=2, fancybox=True, prop={'size': 10.0})
        leg_right.get_frame().set_alpha(0.7)
        a_x_right.set_ylabel("T/K")

    s_n_str = "_".join(s_idx_n[str(x)] for x in spe_idx_tmp)
    # plt.title(s_n_str)

    fig.savefig(os.path.join(data_dir, "output",
                             "trajectory_" + s_n_str + ".jpg"), dpi=500)
    plt.close()
def get_species_with_top_n_concentration(data_dir,
                                         exclude,
                                         top_n=10,
                                         traj_max_t=100.0,
                                         tau=10.0,
                                         end_t=1.0,
                                         tag="M",
                                         atoms=None):
    """
    get species concentration at a tau, where tau is the ratio of the time_wanted/end_time
    """
    if atoms is None:
        atoms = ["C"]
    if exclude is None:
        exclude = []
    conc = np.loadtxt(os.path.join(
        data_dir, "output", "concentration_dlsode_" + str(tag) + ".csv"),
                      delimiter=",")
    # the time point where reference time tau is
    tau_time_point = float(tau) / traj_max_t * len(conc)
    time_idx = int(end_t * tau_time_point)
    if time_idx >= len(conc):
        time_idx = (len(conc) - 1)

    data = conc[time_idx, :]
    c_idx_map = defaultdict(set)
    for idx, val in enumerate(data):
        c_idx_map[val].add(str(idx))
    c_idx_map = OrderedDict(sorted(c_idx_map.items(), reverse=True))

    spe_idx_name_dict, _ = psri.parse_spe_info(data_dir)
    spe_composition = psri.read_spe_composition(
        os.path.join(data_dir, "input", "spe_composition.json"))

    spe_idx_list = []
    counter = 0

    for _, val in enumerate(c_idx_map):
        if counter < top_n:
            spe_idx = next(iter(c_idx_map[val]))
            indicator = False
            for _, atom in enumerate(atoms):
                if atom in spe_composition[spe_idx_name_dict[spe_idx]]:
                    indicator = True
                    break
            if spe_idx_name_dict[spe_idx] not in exclude and indicator:
                print(val, spe_idx, spe_idx_name_dict[spe_idx])
                spe_idx_list.append(int(spe_idx))
                counter += 1

    # species doesn't contain atom we are interested in
    exclude_spe_name_list = []
    for idx, s_n_t in enumerate(spe_composition):
        indicator = False
        for _, atom in enumerate(atoms):
            if atom in spe_composition[s_n_t]:
                indicator = True
        if indicator is False:
            exclude_spe_name_list.append(s_n_t)
    spe_name_list = [str(spe_idx_name_dict[str(x)]) for x in spe_idx_list]
    return spe_idx_list, spe_name_list, exclude_spe_name_list