Beispiel #1
0
def parse_species_from_pathway(data_dir,
                               p_idx_l=None,
                               init_spe=60,
                               atom_followed="C",
                               end_t=1.0,
                               species_path=False):
    """
    parse unique species list from pathway list
    """
    if p_idx_l is None:
        return

    suffix = naming.get_suffix(data_dir,
                               init_spe=init_spe,
                               atom_followed=atom_followed,
                               end_t=end_t)

    prefix = ""
    if species_path is True:
        prefix = "species_"

    f_n_path_name = os.path.join(
        data_dir, "output",
        prefix + "pathway_name_candidate" + suffix + ".csv")
    p_name = np.genfromtxt(f_n_path_name, dtype=str, delimiter=',')

    unique_spe = set()

    for p_idx in p_idx_l:
        matched_spe = re.findall(r"S(\d+)", p_name[p_idx])
        for _, s_idx in enumerate(matched_spe):
            unique_spe.add(s_idx)

    return unique_spe
Beispiel #2
0
def path_prob_terminating_with_spe(data_dir,
                                   init_spe=62,
                                   atom_followed="C",
                                   tau=10.0,
                                   end_t=1.0,
                                   species_path=True,
                                   end_s_idx=None,
                                   exclude_idx=None,
                                   time_axis=0):
    """
    get pathway and their pathway probability, path ending with spe
    """
    if exclude_idx is None:
        exclude_idx = []
    prefix = ""
    if species_path is True:
        prefix = "species_"
    suffix = naming.get_suffix(data_dir,
                               init_spe=init_spe,
                               atom_followed=atom_followed,
                               end_t=end_t)
    f_n_pn = os.path.join(data_dir, "output",
                          prefix + "pathway_name_candidate" + suffix + ".csv")
    f_n_pp = os.path.join(data_dir, "output",
                          prefix + "pathway_prob" + suffix + ".csv")

    path_names = np.loadtxt(f_n_pn, dtype=str, delimiter=",")
    data_pp = np.loadtxt(f_n_pp, dtype=float, delimiter=",")

    dim_n = len(np.shape(data_pp))

    if dim_n == 1:
        data_y = data_pp
    elif dim_n == 2:
        data_y = data_pp[:, time_axis]

    # each pathway as a column
    d_f_n = pd.DataFrame(path_names, columns=['name'], dtype=str)
    d_f_p = pd.DataFrame(data_y, columns=['frequency'], dtype=float)
    d_f = pd.concat([d_f_n, d_f_p], axis=1)
    # print(d_f.head())
    # filter
    if end_s_idx is not None:
        if isinstance(end_s_idx, int):
            d_f = d_f.loc[
                lambda x: x['name'].str.endswith("S" + str(end_s_idx))]
        elif isinstance(end_s_idx, list):
            # got to be a tuple
            mask_str = tuple(["S" + str(e_s) for e_s in end_s_idx])
            d_f = d_f.loc[lambda x: x['name'].str.endswith(mask_str)]

    d_f.sort_values(by='frequency', inplace=True, ascending=False)
    d_f.reset_index(drop=True, inplace=True)
    print(d_f)

    return d_f
Beispiel #3
0
def calculate_Merchant_alpha_value(data_dir,
                                   init_spe=10,
                                   atom_followed="C",
                                   end_t=1.0,
                                   species_path=False,
                                   s_idx=10,
                                   r_idx=736):
    """
    calculate Merchat alpha value at time point as in time.csv, not at time zero
    """
    suffix = naming.get_suffix(data_dir,
                               init_spe=init_spe,
                               atom_followed=atom_followed,
                               end_t=end_t)

    prefix = ""
    if species_path is True:
        prefix = "species_"

    spe_conc_mat = np.loadtxt(os.path.join(data_dir, "output",
                                           "concentration_dlsode_M.csv"),
                              dtype=float,
                              delimiter=',')
    spe_k_mat = np.loadtxt(os.path.join(data_dir, "output",
                                        "drc_dlsode_M.csv"),
                           dtype=float,
                           delimiter=',')

    reaction_rate_mat = np.loadtxt(os.path.join(data_dir, "output",
                                                "reaction_rate_dlsode_M.csv"),
                                   dtype=float,
                                   delimiter=',')

    n_points = np.shape(spe_conc_mat)[0]
    spe_total_sink_rate_vec = np.zeros(n_points)
    merchant_alpha_v = np.zeros(n_points)

    for i in range(1, n_points):
        spe_total_sink_rate_vec[i] = spe_conc_mat[i, s_idx] * spe_k_mat[i,
                                                                        s_idx]
        if spe_total_sink_rate_vec[i] > 0:
            merchant_alpha_v[i] = reaction_rate_mat[
                i, r_idx] / spe_total_sink_rate_vec[i]
    # set the first value at time zero to be the same as value at time one
    merchant_alpha_v[0] = merchant_alpha_v[1]

    merchant_alpha_fn = os.path.join(
        data_dir, "output", prefix + "Merchant_alpha_" + "S" + str(s_idx) +
        "_R" + str(r_idx) + suffix + ".csv")
    np.savetxt(merchant_alpha_fn, merchant_alpha_v, fmt='%.15e')

    return
Beispiel #4
0
def parse_pathway_contains_species(data_dir,
                                   s_idx_ds=None,
                                   init_spe=60,
                                   atom_followed="C",
                                   end_t=1.0,
                                   species_path=False):
    """
    parse pathway contains only species from list or set
    ds --> data structure
    """
    if s_idx_ds is None:
        return

    s_idx_set = set(s_idx_ds)

    suffix = naming.get_suffix(data_dir,
                               init_spe=init_spe,
                               atom_followed=atom_followed,
                               end_t=end_t)

    prefix = ""
    if species_path is True:
        prefix = "species_"

    f_n_path_name = os.path.join(
        data_dir, "output",
        prefix + "pathway_name_candidate" + suffix + ".csv")
    p_name = np.genfromtxt(f_n_path_name, dtype=str, delimiter=',')

    path_list = []
    for idx, path in enumerate(p_name):
        # Contains Species Only From Species List
        c_s_o_f_s_l = True
        matched_spe = re.findall(r"S(\d+)", path)
        for _, s_idx in enumerate(matched_spe):
            if s_idx not in s_idx_set:
                c_s_o_f_s_l = False
                break
        if c_s_o_f_s_l is True:
            path_list.append(idx)

    print(path_list)
    return path_list
Beispiel #5
0
def path_length_statistics(data_dir,
                           init_spe=62,
                           atom_followed="C",
                           end_t=1.0,
                           end_spe=None):
    """
    path length statistics
    """
    d_f = path_prob_terminating_with_spe(data_dir, init_spe, atom_followed,
                                         end_t, end_spe)

    count_map = OrderedDict()
    for _, val in enumerate(d_f['pathway'][0:20]):
        count = int(parse_pattern.parse_path_length(val))
        if count in count_map:
            count_map[count] += 1
        else:
            count_map[count] = 1
    mat = []
    count_map = OrderedDict(sorted(count_map.items()))
    for key, value in count_map.items():
        mat.append([int(key), int(value)])
    suffix = naming.get_suffix(data_dir,
                               init_spe=init_spe,
                               atom_followed=atom_followed,
                               end_t=end_t)

    if end_spe is not None:
        suffix += "_S" + str(end_spe)
    out_f_n = os.path.join(data_dir, "output", "path_length" + suffix + ".csv")
    np.savetxt(out_f_n,
               mat,
               fmt='%d',
               delimiter=',',
               newline='\n',
               header='',
               footer='',
               comments='# ')
Beispiel #6
0
def pathway_time_2_array_index(data_dir,
                               init_spe=None,
                               atom_followed="C",
                               end_t=1.0,
                               species_path=False,
                               time=1.0):
    """
    pathway time converted to array index, pathway time read from pathway_time_canditate*
    """
    suffix = get_suffix(data_dir,
                        init_spe=init_spe,
                        atom_followed=atom_followed,
                        end_t=end_t)
    prefix = ""
    if species_path is True:
        prefix = "species_"

    f_n_path_time = os.path.join(
        data_dir, "output",
        prefix + "pathway_time_candidate" + suffix + ".csv")

    p_time = np.genfromtxt(f_n_path_time, dtype=float, delimiter=',')

    # in case of two dimensional pathway time
    if len(np.shape(p_time)) == 2:
        p_time = p_time[0, :]

    y_idx = [float(i) for i in range(len(p_time))]
    array_idx = interpolation.interp1d(p_time, y_idx, time)

    array_idx = int(array_idx)

    if array_idx >= len(p_time):
        array_idx = len(p_time) - 1
    if array_idx < 0:
        array_idx = 0

    return array_idx
Beispiel #7
0
def parse_spe_production_along_path(data_dir,
                                    top_n=10,
                                    spe_idx=10,
                                    init_spe=62,
                                    atom_followed="C",
                                    end_t=1.0,
                                    species_path=False,
                                    axis=0,
                                    path_branching_factor=False,
                                    s_consumption=False,
                                    s_production=True):
    """
    parse species peoduction along path, note species might not explictly shown on path
    but are side products of reaction on pathway
    if path_idx is None, use top_n path
    if path_idx is not None, instead it is a list, use only selected path, the output file name
    thereafter ends with "selected_path"
    """
    id_tmp = ""
    if spe_idx is None or spe_idx is []:
        return
    elif isinstance(spe_idx, int):
        id_tmp = str(spe_idx)
        spe_idx = [spe_idx]
    else:
        for x_t in spe_idx:
            if id_tmp == "":
                id_tmp = str(x_t)
            else:
                id_tmp += "_" + str(x_t)

    suffix = naming.get_suffix(data_dir,
                               init_spe=init_spe,
                               atom_followed=atom_followed,
                               end_t=end_t)

    prefix = ""
    if species_path is True:
        prefix = "species_"

    f_n_path_name = os.path.join(
        data_dir, "output",
        prefix + "pathway_name_candidate" + suffix + ".csv")
    pathname_data = np.genfromtxt(f_n_path_name, dtype=str, max_rows=top_n + 1)

    # in case of two dimensional pathway name
    if len(np.shape(pathname_data)) == 2:
        pathname_data = pathname_data[:, axis]

    net_reactant = psri.parse_reaction_net_reactant(data_dir)
    net_product = psri.parse_reaction_net_product(data_dir)
    s_p_r_c = psri.parse_species_pair_reaction(data_dir)

    if path_branching_factor is True:
        atom_scheme = asch.get_atom_scheme(data_dir)
        s_idx_name, _ = psri.parse_spe_info(data_dir)

    s_p_c = []
    for _, p_n in enumerate(pathname_data):
        spe_consumption_count = 0
        spe_production_count = 0
        for s_i in spe_idx:
            if s_consumption is True:
                spe_consumption_count += parse_pattern.parse_species_along_path_using_reaction(
                    p_n, net_reactant, s_i, s_p_r_c)
            if s_production is True:
                spe_production_count += parse_pattern.parse_species_along_path_using_reaction(
                    p_n, net_product, s_i, s_p_r_c)

        path_branching_number = 1
        if path_branching_factor is True:
            path_branching_number = parse_pattern.calculate_path_branching_number(
                pathname=p_n,
                net_reactant=net_reactant,
                net_product=net_product,
                s_idx_name=s_idx_name,
                atom_scheme=atom_scheme,
                atom_followed=atom_followed)

        s_p_c.append((spe_production_count - spe_consumption_count) *
                     path_branching_number)

    if id_tmp != "":
        suffix += "_" + id_tmp
    f_n_spe_production_count = os.path.join(
        data_dir, "output",
        prefix + "pathway_species_production_count" + suffix + ".csv")

    np.savetxt(f_n_spe_production_count, s_p_c, fmt='%d')
Beispiel #8
0
def init_directed_network(data_dir,
                          path_idx=None,
                          init_spe=None,
                          atom_followed="C",
                          end_t=None,
                          species_path=False,
                          time_axis=0):
    """
    init directed network
    without parallel edges
    return networkx.DiGraph
    """
    spe_idx_name_dict, _ = psri.parse_spe_info(data_dir)

    suffix = get_suffix(data_dir,
                        init_spe=init_spe,
                        atom_followed=atom_followed,
                        end_t=end_t)
    prefix = ""
    if species_path is True:
        prefix = "species_"

    f_n_path_name = os.path.join(
        data_dir, "output",
        prefix + "pathway_name_candidate" + suffix + ".csv")
    f_n_path_prob = os.path.join(data_dir, "output",
                                 prefix + "pathway_prob" + suffix + ".csv")

    print(f_n_path_name, f_n_path_prob)
    p_n = np.genfromtxt(f_n_path_name, dtype=str, delimiter=',')
    p_p = np.genfromtxt(f_n_path_prob, dtype=float, delimiter=',')

    # in case of two dimensional pathway name
    if len(np.shape(p_n)) == 2:
        p_n = p_n[:, time_axis]
    if len(np.shape(p_p)) == 2:
        p_p = p_p[:, time_axis]

    # retrieve pathway name and pathway probability before sort
    p_n = [p_n[i] for i in path_idx]
    p_p = [p_p[i] for i in path_idx]

    # set the data type seperately
    d_f_n = pd.DataFrame(p_n, columns=['name'], dtype=str)
    d_f_p = pd.DataFrame(p_p, columns=['prob'], dtype=float)
    d_f = pd.concat([d_f_n, d_f_p], axis=1)

    d_f.sort_values(by='prob',
                    ascending=False,
                    inplace=True,
                    na_position='last')
    d_f.reset_index(drop=True, inplace=True)
    print(d_f.head())

    # temporary directed graph
    d_g_tmp = nx.DiGraph()

    # modify labels
    spe_union_find_group = global_settings.get_union_find_group(
        DATA_DIR, atom_followed)

    # record all nodes
    nodes = set()
    for _, val in d_f.iterrows():
        matched_spe = re.findall(r"S(\d+)", val['name'])
        for _, spe in enumerate(matched_spe):
            nodes.add(
                change_spe_name(spe,
                                spe_idx_name_dict,
                                union_find=spe_union_find_group))

    for _, val in enumerate(nodes):
        d_g_tmp.add_node(val, weight=0.0, label=str(val))

    for _, val in d_f.iterrows():
        prob = float(val['prob'])

        # get rid of R-1000003S90, don't need it here
        print(val['name'])
        path_name_tmp = re.sub(r"R-\d+S\d+", r'', val['name'])
        print(path_name_tmp)

        # pathway contains both reaction and species
        if species_path is False:
            matched_spe = re.findall(r"S(\d+)", path_name_tmp)
            matched_reaction = re.findall(r"R(\d+)", path_name_tmp)
            for idx, spe in enumerate(matched_spe):
                d_g_tmp.node[change_spe_name(
                    spe, spe_idx_name_dict,
                    union_find=spe_union_find_group)]['weight'] += 1.0 * prob
                if idx > 0:
                    src = change_spe_name(matched_spe[idx - 1],
                                          spe_idx_name_dict,
                                          union_find=spe_union_find_group)
                    dest = change_spe_name(spe,
                                           spe_idx_name_dict,
                                           union_find=spe_union_find_group)
                    rxn = change_rxn_name(matched_reaction[idx - 1])
                    if d_g_tmp.has_edge(src, dest):
                        d_g_tmp[src][dest]['weight'] += 1.0 * prob
                        d_g_tmp[src][dest]['reactions'].add(rxn)
                    else:
                        d_g_tmp.add_edge(src,
                                         dest,
                                         reactions=set([rxn]),
                                         weight=1.0 * prob)
        else:
            matched_spe = re.findall(r"S(\d+)", path_name_tmp)
            for idx, spe in enumerate(matched_spe):
                d_g_tmp.node[change_spe_name(
                    spe, spe_idx_name_dict,
                    union_find=spe_union_find_group)]['weight'] += 1.0 * prob
                if idx > 0:
                    src = change_spe_name(matched_spe[idx - 1],
                                          spe_idx_name_dict,
                                          union_find=spe_union_find_group)
                    dest = change_spe_name(spe,
                                           spe_idx_name_dict,
                                           union_find=spe_union_find_group)
                    rxn = '-1'
                    if d_g_tmp.has_edge(src, dest):
                        d_g_tmp[src][dest]['weight'] += 1.0 * prob
                        d_g_tmp[src][dest]['reactions'].add(rxn)
                    else:
                        d_g_tmp.add_edge(src,
                                         dest,
                                         reactions=set([rxn]),
                                         weight=1.0 * prob)

    # update directed graph, for example,
    # 1. reactions is originally a set, combine to get a string of reactions
    # 2. smooth and re-normalize node weight
    # 3. re-normalize edge weight
    node_weight = []
    for _, val in enumerate(d_g_tmp.nodes()):
        node_weight.append(d_g_tmp.node[val]['weight'])
    edge_weight = []
    for _, val in enumerate(d_g_tmp.edges()):
        edge_weight.append(d_g_tmp[val[0]][val[1]]['weight'])
    node_weight = rescale_array(node_weight, 1.0, 5.0)
    edge_weight = rescale_array(edge_weight, 3.0, 15.0)

    # final directed graph
    di_graph = nx.DiGraph()
    for idx, val in enumerate(d_g_tmp.nodes()):
        di_graph.add_node(val, weight=node_weight[idx])
    for idx, val in enumerate(d_g_tmp.edges()):
        src = val[0]
        dest = val[1]

        rxn_set = d_g_tmp[src][dest]['reactions']
        rxn_set = sorted(rxn_set, key=lambda x: int(x), reverse=False)
        name = ",".join(x for x in rxn_set)

        weight = edge_weight[idx]
        di_graph.add_edge(src, dest, name=name, weight=weight)

    return di_graph