def intersect_directly(R, internal_metabolites, network, verbose=True, tol=1e-12, sort_order='min_adj',
                       intermediate_cone_path='', manual_override = ''):
    """

    :param R:
    :param internal_metabolites:
    :param network:
    :param verbose:
    :param tol:
    :param sort_order: Different options for determining metabolite intersection order. As a default we will intersect
    the metabolite that adds the minimal number of adjacencies in the model. Other options are 'min_lp',
    'max_lp_per_adj', and 'min_connections'.
    :return:
    """
    if intermediate_cone_path:
        R, internal_metabolites, network = pick_up_intermediate_cone(internal_metabolites, network,
                                                                     intermediate_cone_path)

    # rows are metabolites
    deleted = np.array([])
    it = 1
    internal = list(internal_metabolites)
    internal.sort()
    rows_removed_redund = 0

    while len(internal) > 0:
        sorting = sort_order

        # For each internal metabolite, calculate the number of producing reactions times the number of consuming
        # R[j-len(deleted[deleted<j]) is the current row for the metabolite that was once at the j-th place
        n_lps = [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0) for j
                 in internal]
        # If there is a metabolite that can be deleted without any lps being done, we will do it immediately
        if np.min(n_lps) == 0:
            sorting = 'min_lp'
        if manual_override and (it == 1):
            i = internal[int(manual_override)]
        elif sorting == 'min_lp':
            i = internal[np.argmin(n_lps)]
        elif sorting == 'min_connections':
            # Alternative way of choosing metabolite, choose the one that is minimally connected
            connections = []
            adj = get_metabolite_adjacency(R)
            for met in internal:
                curr_ind = met - len(deleted[deleted < met])
                connections.append(int(np.sum(adj[:, curr_ind])))

            min_connect_inds = np.array(internal)[np.where(connections == np.min(connections))[0]]
            # Pick the one with least LPs to be done, if equally connected
            i = min_connect_inds[np.argmin(
                [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0)
                 for j in min_connect_inds])]

        elif sorting == 'min_adj' or sorting == 'max_lp_per_adj':
            # Alternative way of choosing metabolite, choose the one that increases adjacencies the least
            adj_added = []  # This will contain for each metabolite the number connections between metabs removal adds
            adj = get_metabolite_adjacency(R)
            old_n_adjs = np.sum(adj)
            for met in internal:
                new_adj = adj.copy()
                curr_ind = met - len(deleted[deleted < met])
                new_adj[np.where(adj[:, curr_ind] != 0), :] += new_adj[curr_ind, :]
                np.fill_diagonal(new_adj, 0)
                new_adj = np.minimum(new_adj, 1)
                new_adj = np.delete(np.delete(new_adj, curr_ind, axis=0), curr_ind, axis=1)
                new_n_adjs = np.sum(new_adj)
                adj_added.append(int(new_n_adjs - old_n_adjs))

            if sorting == 'min_adj':
                min_adj_inds = np.array(internal)[np.where(adj_added == np.min(adj_added))[0]]
                # Pick the one with least LPs to be done, if adding equal adjacencies
                i = min_adj_inds[np.argmin(
                    [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(
                        R[j - len(deleted[deleted < j]), :] < 0) for j in min_adj_inds])]
            elif sorting == 'max_lp_per_adj':
                lp_per_adj = np.array(
                    [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0)
                     for j in internal]) / (np.array(adj_added) - np.min(adj_added) + 1)
                i = internal[np.argmax(lp_per_adj)]

        # i - len(deleted[deleted<i] is the current row for the metabolite that was once at the ith place
        to_remove = i - len(deleted[deleted < i])
        if verbose:
            mp_print("\n\nIteration %d (internal metabolite = %d: %s) of %d" % (
                it, to_remove, [m.id for m in network.metabolites][to_remove], len(internal_metabolites)))
            mp_print("Possible LP amounts for this step:\n" + ", ".join(np.array(n_lps).astype(str)))
            mp_print("Total: %d" % sum(n_lps))
            if manual_override and (it == 1):
                mp_print("Sorting was manually chosen for this first step.\n")
            elif sorting == 'min_adj':
                mp_print("Possible adjacencies added for this step:\n" + ", ".join(np.array(adj_added).astype(str)))
                mp_print("Minimal adjacency option chosen.\n")
            elif sorting == 'max_lp_per_adj':
                mp_print("Possible lps per adjacency added for this step:\n" + ", ".join(
                    np.round(np.array(lp_per_adj), 2).astype(str)))
                mp_print("Rescaled maximal LPs per added adjacency option chosen.\n")
            elif sorting == 'min_connections':
                mp_print("Possible connectedness of metabolites for this sstep:\n" + ", ".join(
                    np.array(connections).astype(str)))
                mp_print("Minimally connected option chosen.\n")
            elif sorting == 'min_lp':
                mp_print("Minimal LPs chosen.\n")
            it += 1

        # input("waiting")
        if np.sum(R[i - len(deleted[deleted < i]), :] > 0) * np.sum(R[i - len(deleted[deleted < i]), :] < 0) == 0:
            R = iteration_without_lps(R, to_remove, network)
        else:
            R, removed = eliminate_metabolite(R, to_remove, network, calculate_adjacency=True)
            rows_removed_redund += removed
        deleted = np.append(deleted, i)
        internal.remove(i)

        if get_process_rank() == 0:
            try:
                metab_ids = [metab.id for metab in network.metabolites]
                np.savetxt('intermediate_conversion_cone.csv', np.transpose(R), delimiter=',',
                           header=','.join(metab_ids), comments='')
            except OverflowError:
                mp_print('Intermediate result cannot be stored due to too large numbers.')

    # remove artificial rays introduced by splitting metabolites
    R, ids = unsplit_metabolites(R, network)

    if verbose:
        mp_print("\n\tRows removed by redund overall: %d\n" % rows_removed_redund)
        if rows_removed_redund != 0:
            pass
            # input("Waiting...")

    return R, ids
                verbose=args.verbose,
                only_rays=args.only_rays,
                redund_after_polco=args.redund_after_polco)

            # if external_cycles:
            #     T_intersected = np.transpose(cone)
            #     external_cycles_array = to_fractions(np.zeros((T_intersected.shape[0], len(external_cycles))))
            #     for ind, cycle in enumerate(external_cycles):
            #         for cycle_metab in cycle:
            #             metab_ind = [ind for ind, metab in enumerate(ids) if metab == cycle_metab][0]
            #             external_cycles_array[metab_ind, ind] = cycle[cycle_metab]
            #
            #     T_intersected = np.concatenate((T_intersected, external_cycles_array, -external_cycles_array), axis=1)
            #     cone = np.transpose(T_intersected)

        cone_transpose, ids = unsplit_metabolites(np.transpose(cone), network)
        cone = np.transpose(cone_transpose)
        #
        internal_ids = []
        for metab in network.metabolites:
            if not metab.is_external:
                id_ind = [ind for ind, id in enumerate(ids) if id == metab.id]
                if len(id_ind):
                    internal_ids.append(id_ind[0])

        ids = list(np.delete(ids, internal_ids))
        cone = np.delete(cone, internal_ids, axis=1)

    if mpi_wrapper.is_first_process():
        try:
            np.savetxt(args.out_path,
Example #3
0
def calc_ECMs(file_path, print_results=False, input_file_path=''):
    """
    Calculates ECMs using ECMtool
    :return ecms: np.array
            This array contains the ECMs as columns and the metabolites as rows
    :param file_path: string
            String with path to the SBML-file.
    :param reactions_to_tag: list with strings
            List with reaction-IDs of reactions that need to be tagged
    :param print_results: Boolean
    :param hide_metabs: indices of metabolites that should be ignored
    """
    # Stap 1: netwerk bouwen
    network = extract_sbml_stoichiometry(file_path, determine_inputs_outputs=True)

    external_inds = [ind for ind, metab in enumerate(network.metabolites) if metab.is_external]

    """The following are just for checking the inputs to this program."""
    metab_info_ext = [(ind, metab.id, metab.name, metab.direction) for ind, metab in
                      enumerate(network.metabolites) if metab.is_external]

    """I extract some information about the external metabolites for checking"""
    metab_info_ext_df = pd.DataFrame(metab_info_ext, columns=['metab_ind', 'metab_id', 'metab_name', 'Direction'])

    """You can choose to save this information, by uncommenting this line"""
    #    metab_info_ext_df.to_csv(path_or_buf='external_info_iJR904.csv', index=False)

    """If an input file is supplied, we set in input, output, and hide metabolites from this"""
    if input_file_path:  # If no input file is supplied, standard detection of ecmtool is used
        info_metabs_df = pd.read_csv(input_file_path)
        info_metabs_input = info_metabs_df[info_metabs_df.Input == 1]
        info_metabs_output = info_metabs_df[info_metabs_df.Output == 1]
        info_metabs_hidden = info_metabs_df[info_metabs_df.Hidden == 1]

        # Get the indices that correspond to the metabolites that are inputs, outputs, or hidden.
        input_inds = list(info_metabs_input.Index.values)
        output_inds = list(info_metabs_output.Index.values) + [ind for ind, metab in enumerate(network.metabolites) if
                                                               metab.id == 'objective']
        hide_inds = list(info_metabs_hidden.Index.values)
        prohibit_inds = [ind for ind, metab in enumerate(network.metabolites) if
                         (metab.is_external) & (not ind in input_inds + output_inds + hide_inds) & (
                             not metab.id == 'objective')]
        both_inds = [ind for ind in range(len(network.metabolites)) if (ind in input_inds) and (ind in output_inds)]

        # Use input information to set input, output, hidden, and prohibited metabolites
        network.set_inputs(input_inds)
        network.set_outputs(output_inds)
        network.set_both(both_inds)
        network.prohibit(prohibit_inds)
        network.hide(hide_inds)

        # Print comma-separated lists of input information. These lists can be used for running the same computation
        # via command line, for example to use other arguments
        print(','.join(map(str, input_inds)))
        print(','.join(map(str, output_inds)))
        print(','.join(map(str, hide_inds)))
        print(','.join(map(str, prohibit_inds)))

    """Keep a copy of the full network before compression. This can be nice for later."""
    full_network = copy.deepcopy(network)
    orig_N = network.N

    """"Split in and out metabolites, to facilitate ECM computation"""
    network.split_in_out(only_rays=False)

    """Stap 2: compress network"""
    network.compress(verbose=True)

    """Stap 3: Ecms enumereren"""
    #  In this script, indirect intersection is used. Use command line options to use direct intersection
    cone = get_conversion_cone(network.N, network.external_metabolite_indices(), network.reversible_reaction_indices(),
                            network.input_metabolite_indices(), network.output_metabolite_indices(), only_rays=False,
                            verbose=True)
    cone_transpose, ids = unsplit_metabolites(np.transpose(cone), network)
    cone = np.transpose(cone_transpose)

    if print_results:
        print_ecms_direct(np.transpose(cone), ids)

    cone = cone.transpose()  # columns will be the different ECMs, rows are metabolites

    return cone, ids, full_network