def independent_rows(A, verbose=False):
    m, n = A.shape
    basis = np.asarray([], dtype='int')
    A_float = np.asarray(A, dtype='float')
    rank = np.linalg.matrix_rank(A_float)
    original_rank = rank

    if rank == m:
        return A

    rank = 0
    for i in range(m):
        if (i+1) % 100 == 0:
            mp_print("Finding independent rows is on row " + str(i) + " of " + str(m))
        prev_rank = rank
        prev_basis = basis
        basis = np.append(basis, i)
        rank = np.linalg.matrix_rank(A_float[basis])

        if rank == prev_rank:  # row added did not increase rank
            basis = prev_basis
        # else:
        #     mp_print("added row, condition number is now: %f" % np.linalg.cond(A_float[basis]))
        #     if np.linalg.cond(A_float[basis]) > 1000:
        #         basis = prev_basis
        #         rank = np.linalg.matrix_rank(A_float[basis])
        #         mp_print("Rejected column based on condition number...")

        if rank == original_rank:
            break

    if rank != original_rank:
        mp_print("\t(rows) Rank deficiency! Got rank %d instead of %d" % (rank, original_rank))
    return A[basis]
def cancel_with_cycle(R, met, cycle_ind, network, verbose=True, tol=1e-12, removable_is_ext=False):
    cancelling_reaction = R[:, cycle_ind]
    reactions_using_met = [i for i in range(R.shape[1]) if R[met, i] != 0 and i != cycle_ind]

    next_R = np.copy(R)
    to_be_dropped = [cycle_ind]

    for reac_ind in reactions_using_met:
        coeff_cycle = R[met, cycle_ind]
        coeff_reac = R[met, reac_ind]
        new_ray = (R[:, reac_ind] - (coeff_reac / coeff_cycle) * R[:, cycle_ind]) * abs(coeff_cycle / np.gcd(coeff_cycle, coeff_reac))
        if sum(abs(new_ray)) > tol:
            next_R[:, reac_ind] = new_ray
        else:
            to_be_dropped.append(reac_ind)

    # Delete cycle ray that is now the only one that produces met, so has to be zero + rays that are full of zeros now
    next_R = np.delete(next_R, to_be_dropped, axis=1)
    network.drop_reactions(to_be_dropped)

    if not removable_is_ext:
        # delete all-zero row
        next_R = np.delete(next_R, met, 0)
        network.drop_metabolites([met])
        mp_print("After removing this metabolite, we have %d metabolites and %d rays" %
                 (next_R.shape[0], next_R.shape[1]))
        external_cycle_dict = None
    else:
        external_cycle_dict = {}
        for ind, metab in enumerate(network.metabolites):
            if R[ind, cycle_ind] != 0:
                external_cycle_dict.update({metab.id: R[ind, cycle_ind]})

    return next_R, external_cycle_dict
def get_start_basis(A):
    # return any column basis of A
    m, n = A.shape

    rank = 0
    new_basis = np.array([], dtype='int')
    for i in range(n):
        if i in new_basis:
            continue
        prev_rank = rank
        prev_basis = new_basis
        new_basis = np.append(new_basis, i)
        rank = np.linalg.matrix_rank(A[:, new_basis])

        if rank == prev_rank:  # column added did not increase rank
            new_basis = prev_basis
        # else:
        #     mp_print("added column, condition number is now: %f" % np.linalg.cond(A[:, new_basis]))
        #     if np.linalg.cond(A[:, new_basis]) > 1000:
        #         new_basis = prev_basis
        #         rank = np.linalg.matrix_rank(A[:, new_basis])
        #         mp_print("Rejected column based on condition number...")

        if rank == m:
            break

    if rank != m:
        mp_print("\tRank deficiency! Got rank %d instead of %d" % (rank, m))
    return new_basis
Ejemplo n.º 4
0
def pre_redund(matrix_indep_rows):
    """In this function, we try to filter out many redundant rays, without claiming that all redundant rays are
    filtered out. Running a redundancy-removal method is still needed after this."""
    start_basis = get_basis_columns_qr(matrix_indep_rows)
    start_basis_inv = np.linalg.inv(matrix_indep_rows[:, start_basis])

    filtered_inds = start_basis
    filtered_rays = matrix_indep_rows[:, start_basis]
    n_rays = matrix_indep_rows.shape[1]
    local_basis_inds = np.arange(len(start_basis))
    for i in range(n_rays):
        if i % 100 == 0:
            mp_print(
                "Passed %d of %d (%f %%) through redundancy filter. Found %d redundant rays."
                % (i, n_rays, i / n_rays * 100,
                   i - len(np.where(filtered_inds < i)[0])))
        if i not in filtered_inds:
            new_ray = matrix_indep_rows[:, i][:, np.newaxis]
            filtered_rays_new = np.append(filtered_rays, new_ray, axis=1)
            basis = add_first_ray(filtered_rays_new, start_basis_inv,
                                  local_basis_inds,
                                  filtered_rays_new.shape[1] - 1)
            extreme = check_extreme(filtered_rays_new,
                                    filtered_rays_new.shape[1] - 1, basis)
            if extreme:
                filtered_rays = filtered_rays_new
                filtered_inds = np.append(filtered_inds, i)

    return filtered_inds
Ejemplo n.º 5
0
def cancel_with_cycle_redund(R, met, cycle_ind, verbose=True, tol=1e-12):
    cancelling_reaction = R[:, cycle_ind]
    reactions_using_met = [
        i for i in range(R.shape[1]) if R[met, i] != 0 and i != cycle_ind
    ]

    cycle_ray = R[:, cycle_ind]
    # next_R = np.copy(R)
    to_be_dropped = [cycle_ind]

    n_reacs = len(reactions_using_met)
    for enum_ind, reac_ind in enumerate(reactions_using_met):
        if verbose:
            if enum_ind % 10000 == 0:
                mp_print(
                    "Removed cycle metab from %d of %d reactions (%f %%)" %
                    (enum_ind, n_reacs, enum_ind / n_reacs * 100))
        coeff_cycle = cycle_ray[met]
        coeff_reac = R[met, reac_ind]
        new_ray = R[:, reac_ind] - (coeff_reac / coeff_cycle) * cycle_ray
        if sum(abs(new_ray)) > tol:
            R[:, reac_ind] = new_ray
        else:
            to_be_dropped.append(reac_ind)

    # Delete cycle ray that is now the only one that produces met, so has to be zero + rays that are full of zeros now
    R = np.delete(R, to_be_dropped, axis=1)

    return R
def get_remove_metabolite(R, network, reaction, verbose=True, allow_external=False):
    column = R[:, reaction]
    for i in range(len(column)):
        if (not network.metabolites[i].is_external) or (allow_external):
            if column[i] != 0:
                return i
    mp_print("\tWarning: reaction to augment has only external metabolites")
    return -1
def compress_after_cycle_removing(network, verbose=True):
    original_metabolite_count, original_reaction_count = network.N.shape
    network.cancel_singly(verbose=verbose)
    # network.cancel_dead_ends(verbose=verbose)

    if verbose:
        mp_print('Removed %d reactions and %d metabolites in total' %
                 (original_reaction_count - network.N.shape[1], original_metabolite_count - network.N.shape[0]))

    return network
def get_nonsingular_pair(A, basis, entering, leaving, basis_hashes):
    for enter in entering:
        for leave in leaving:
            original = basis[leave]
            basis[leave] = enter
            if np.linalg.matrix_rank(A[:, basis]) >= min(A[:, basis].shape):
                if hash(np.sort(basis).tostring()) in basis_hashes:
                    basis[leave] = original
                    continue
                return basis
            basis[leave] = original
    mp_print("Did not find non-singular entering+leaving index...")
    basis[leaving[0]] = entering[0]
    return basis
Ejemplo n.º 9
0
def set_inoutputs(inputs, outputs, network):
    inputs = [int(index) for index in inputs.split(',') if len(index)]
    outputs = [int(index) for index in outputs.split(',') if len(index)]
    if len(outputs) < 1 and len(inputs) >= 1:
        # If no outputs are given, define all external metabolites that are not inputs as outputs
        mp_print(
            'No output metabolites given or determined from model. All non-input metabolites will be defined as outputs.'
        )
        outputs = np.setdiff1d(network.external_metabolite_indices(), inputs)

    network.set_inputs(inputs)
    network.set_outputs(outputs)
    if len(np.intersect1d(inputs, outputs)):
        for ind in np.intersect1d(inputs, outputs):
            mp_print(
                'Metabolite %s was marked as both only input and only output. It is set to both.'
                % (network.metabolites[ind].id))
        network.set_both(np.intersect1d(inputs, outputs))
    return
def iteration_without_lps(R, met, network):
    next_matrix = []
    for z in range(R.shape[1]):
        if abs(R[met, z]) < 1e-12:
            col = R[:, z]
            next_matrix.append(col)
    next_matrix = np.asarray(next_matrix)
    next_matrix = np.transpose(next_matrix)

    # delete all-zero row
    if next_matrix.shape[0]:  # Only if there is still something to remove
        next_matrix = np.delete(next_matrix, met, 0)

    network.drop_metabolites([met])
    if next_matrix.shape[0]:
        mp_print("After removing this metabolite, we have %d metabolites and %d rays" %
                 (next_matrix.shape[0], next_matrix.shape[1]))
    else:
        mp_print("After removing this metabolite, we have no rays left")

    return next_matrix
def determine_adjacency(R, i, j, basis, tol=1e-10):
    # A_ub, b_ub, A_eq, b_eq, c, x0 = setup_LP(R, i, j) # We don't need A_ub, and its causing memory issues
    A_eq, b_eq, c, x0 = setup_LP(R, i, j)
    b_eq, x0 = perturb_LP(b_eq, x0, A_eq, basis, 1e-10)
    KKT, status = kkt_check(c, A_eq, x0, basis, i, j)

    counter_seeds = 1
    while status == 2:
        tol = tol + 1e-10
        b_eq, x0 = perturb_LP(b_eq, x0, A_eq, basis, tol, seed=42 + counter_seeds)
        KKT, status = kkt_check(c, A_eq, x0, basis, i, j)
        counter_seeds = counter_seeds + 1
        if counter_seeds % 20 == 0:
            mp_print(
                'Warning: Adjacency check keeps cycling, even with different perturbations. Reporting rays as adjacent.',
                PRINT_IF_RANK_NONZERO=True)
            status = 0
            KKT = True

    if status == 0:
        return 1 if KKT else 0
    else:
        mp_print('LinAlgError in an adjacency test. Check if this happens more often.', PRINT_IF_RANK_NONZERO=True)
        mp_print('Now assuming that rays are adjacent.', PRINT_IF_RANK_NONZERO=True)
        return 1
Ejemplo n.º 12
0
def check_extreme(R, i, basis, tol=1e-10):
    A_eq, b_eq, c, x0 = setup_LP_redund(R, i)
    b_eq, x0 = perturb_LP(b_eq, x0, A_eq, basis, tol)
    KKT, status = kkt_check_redund(c, A_eq, x0, basis, i)

    counter_seeds = 1
    while status == 2:
        tol = tol + 1e-10
        b_eq, x0 = perturb_LP(b_eq,
                              x0,
                              A_eq,
                              basis,
                              tol,
                              seed=42 + counter_seeds)
        KKT, status = kkt_check_redund(c, A_eq, x0, basis, i)
        counter_seeds = counter_seeds + 1
        if counter_seeds % 20 == 0:
            mp_print(
                'Warning: Adjacency check keeps cycling, even with different perturbations. Reporting rays as adjacent.',
                PRINT_IF_RANK_NONZERO=True)
            status = 0
            KKT = True

    if status == 0:
        return KKT
    else:
        mp_print(
            'LinAlgError in an adjacency test. Check if this happens more often.',
            PRINT_IF_RANK_NONZERO=True)
        mp_print('Now assuming that rays are adjacent.',
                 PRINT_IF_RANK_NONZERO=True)
        return True
def generate_BFS(R, i, j, eps):
    ray1 = np.array(np.concatenate((R[:, i], -R[:, i])), dtype='float')
    ray2 = np.array(np.concatenate((R[:, j], -R[:, j])), dtype='float')
    with np.errstate(divide='ignore', invalid='ignore'):
        alpha, k = smallest_positive(eps / ray1)
        beta = ray2[k] / ray1[k]
        arr = (eps - alpha * ray1) / (ray2 - beta * ray1)
        arr[k] = -1  # ignore place k, because it should always be divide by 0
        delta2, _ = smallest_positive(arr)
        delta1 = -beta * delta2
    sbar = eps - (alpha + delta1) * ray1 - delta2 * ray2
    l = np.zeros(R.shape[1])
    l[i] = 0.5 + alpha + delta1
    l[j] = 0.5 + delta2

    res = np.concatenate((l, sbar))
    # round to 0 when a rounding error made it non-zero
    res = np.where(abs(res) < 1e-20, 0, res)

    if len(res[res != 0]) != R.shape[0] * 2:
        mp_print("problem in generate_BFS")

    return res
Ejemplo n.º 14
0
def print_ECMs(cone, debug_tags, network, orig_N, add_objective_metabolite,
               check_feasibility):
    for index, ecm in enumerate(cone):
        # Normalise by objective metabolite, if applicable
        objective_index = -1 - len(debug_tags)
        objective = ecm[objective_index]
        if add_objective_metabolite and objective > 0:
            ecm /= objective

        metabolite_ids = [
            met.id for met in network.metabolites
        ] if not network.compressed else network.uncompressed_metabolite_ids

        mp_print('\nECM #%d:' % (index + 1))
        for metabolite_index, stoichiometry_val in enumerate(ecm):
            if stoichiometry_val != 0.0:
                mp_print('%s\t\t->\t%.4f' %
                         (metabolite_ids[metabolite_index], stoichiometry_val))

        if check_feasibility:
            satisfied = ecm_satisfies_stoichiometry(orig_N, cone[index, :])
            mp_print('ECM satisfies stoichiometry'
                     if satisfied else 'ECM does not satisfy stoichiometry')
def independent_rows_qr(A, tol=1e-10, verbose=False):
    if verbose:
        mp_print("Finding set of independent rows.")

    row_inds = licols(np.transpose(A), tol=tol)
    return A[row_inds, :]
Ejemplo n.º 16
0
        default='',
        help=
        'Filename where intermediate cone result can be found. If an empty string is given (default), then no intermediate result is picked up and the calculation is done in full'
    )
    parser.add_argument(
        '--manual_override',
        type=str,
        default='',
        help=
        'Index indicating which metabolite should be intersected in first step. Advanced option, can be used in combination with --intermediate_cone_path, to pick a specific intersection at a specific time.'
    )

    args = parser.parse_args()

    if args.model_path == '':
        mp_print('No model given, please specify --model_path')
        exit()

    if len(args.inputs) or len(args.outputs):
        # Disable automatic determination of external metabolite direction if lists are given manually
        args.auto_direction = False

    if args.iterative:
        # Only compress when flag is enabled, and when not performing iterative enumeration.
        # Iterative enumeration performs compression after the iteration steps.
        args.compress = False

    if sys.platform.startswith('win32'):
        mp_print(
            '!!\n'
            'Running ecmtool on Linux might be faster than on Windows, because of external dependencies.\n'
Ejemplo n.º 17
0
def remove_cycles_redund(R, tol=1e-12, verbose=True):
    """Detect whether there are cycles, by doing an LP. If LP is unbounded find a minimal cycle. Cancel one metabolite
    with the cycle."""
    zero_rays = np.where(np.count_nonzero(R, axis=0) == 0)[0]
    for ray_ind in zero_rays:
        R = np.delete(R, ray_ind, axis=1)
    cycle_rays = np.zeros((R.shape[0], 0))
    A_eq, b_eq, c, x0 = setup_cycle_LP(independent_rows_qr(
        normalize_columns(np.array(R, dtype='float'))),
                                       only_eq=True)

    if verbose:
        mp_print('Constructing basis for LP')
    basis = get_basis_columns_qr(np.asarray(A_eq, dtype='float'))
    b_eq, x0 = perturb_LP(b_eq, x0, A_eq, basis, 1e-10)
    if verbose:
        mp_print('Starting linearity check using LP.')
    cycle_present, status, cycle_indices = cycle_check_with_output(
        c, np.asarray(A_eq, dtype='float'), x0, basis)

    if status != 0:
        print("Cycle check failed, trying normal LP")
        A_ub, b_ub, A_eq, b_eq, c, x0 = setup_cycle_LP(
            independent_rows_qr(normalize_columns(np.array(R, dtype='float'))))
        res = linprog(c,
                      A_ub,
                      b_ub,
                      A_eq,
                      b_eq,
                      method='revised simplex',
                      options={'tol': 1e-12},
                      x0=x0)
        if res.status == 4:
            print(
                "Numerical difficulties with revised simplex, trying interior point method instead"
            )
            res = linprog(c,
                          A_ub,
                          b_ub,
                          A_eq,
                          b_eq,
                          method='interior-point',
                          options={'tol': 1e-12})

        cycle_present = True if np.max(res.x) > 90 else False
        if cycle_present:
            cycle_indices = np.where(res.x > 90)[0]
        if np.any(np.isnan(res.x)):
            raise Exception(
                'Remove cycles did not work, because LP-solver had issues. Try to solve this.'
            )

    # if the objective is unbounded, there is a cycle that sums to zero
    while cycle_present:
        # Find minimal cycle
        met = -1
        counter = 0
        while met < 0:
            cycle_ind = cycle_indices[counter]
            met = get_remove_metabolite_redund(R, cycle_ind)
            counter = counter + 1

        cycle_rays = np.append(cycle_rays,
                               R[:, cycle_ind][:, np.newaxis],
                               axis=1)
        R = cancel_with_cycle_redund(R, met, cycle_ind)

        # Do new LP to check if there is still a cycle present.
        A_eq, b_eq, c, x0 = setup_cycle_LP(independent_rows_qr(
            normalize_columns(np.array(R, dtype='float'))),
                                           only_eq=True)

        basis = get_basis_columns_qr(np.asarray(A_eq, dtype='float'))
        b_eq, x0 = perturb_LP(b_eq, x0, A_eq, basis, 1e-10)
        if verbose:
            mp_print('Starting linearity check in H_ineq using LP.')
        cycle_present, status, cycle_indices = cycle_check_with_output(
            c, np.asarray(A_eq, dtype='float'), x0, basis)

        if status != 0:
            print("Cycle check failed, trying normal LP")
            A_ub, b_ub, A_eq, b_eq, c, x0 = setup_cycle_LP(
                independent_rows_qr(
                    normalize_columns(np.array(R, dtype='float'))))
            res = linprog(c,
                          A_ub,
                          b_ub,
                          A_eq,
                          b_eq,
                          method='revised simplex',
                          options={'tol': 1e-12},
                          x0=x0)
            if res.status == 4:
                print(
                    "Numerical difficulties with revised simplex, trying interior point method instead"
                )
                res = linprog(c,
                              A_ub,
                              b_ub,
                              A_eq,
                              b_eq,
                              method='interior-point',
                              options={'tol': 1e-12})

            cycle_present = True if np.max(res.x) > 90 else False
            if cycle_present:
                cycle_indices = np.where(res.x > 90)[0]
            if np.any(np.isnan(res.x)):
                raise Exception(
                    'Remove cycles did not work, because LP-solver had issues. Try to solve this.'
                )

    return R, cycle_rays
def geometric_ray_adjacency(ray_matrix, plus=[-1], minus=[-1], tol=1e-3, verbose=True, remove_cycles=True):
    """
    Returns r by r adjacency matrix of rays, given
    ray matrix R. Diagonal is 0, not 1.
    Calculated using LP adjacency test
    :param R: ray matrix (columns are generating rays)
        plus: indices of 'plus' columns
        minus: indices of 'minus' columns
        if plus/minus are not provided, find all adjacencies; otherwise only between each + and - pair
    :return: r by r adjacency matrix
    """
    start = time()

    matrix_normalized = normalize_columns(ray_matrix)
    matrix_indep_rows = independent_rows_qr(matrix_normalized)
    # matrix_indep_rows = independent_rows(matrix_normalized)

    # set default plus and minus
    if len(plus) > 0 and plus[0] == -1:
        plus = [x for x in range(matrix_indep_rows.shape[1])]
    if len(minus) > 0 and minus[0] == -1:
        minus = [x for x in range(matrix_indep_rows.shape[1])]


    mpi_size = get_process_size()
    mpi_rank = get_process_rank()

    adjacency = []
    nr_tests = len(plus) * len(minus)

    # first find any column basis of R_indep
    start_basis = get_basis_columns_qr(matrix_indep_rows)
    start_basis_inv = np.linalg.inv(matrix_indep_rows[:, start_basis])
    for i, p in enumerate(plus):
        # add the plus ray into the basis
        plus_basis = add_first_ray(matrix_indep_rows, start_basis_inv, start_basis, p)
        plus_basis_inv = np.linalg.inv(matrix_indep_rows[:, plus_basis])

        for j, m in enumerate(minus):
            it = j + len(minus) * i
            if it % mpi_size == mpi_rank:
                # add the minus ray into the basis
                plus_minus_basis = add_second_ray(matrix_indep_rows, plus_basis_inv, plus_basis, p, m)

                res = determine_adjacency(matrix_indep_rows, p, m, plus_minus_basis)
                if res == 1:
                    adjacency.append((p, m))

                if it % (100 * mpi_size) == mpi_rank:
                    mp_print("Process %d is on adjacency test %d of %d (%f %%)" %
                             (mpi_rank, it, nr_tests, it / nr_tests * 100), PRINT_IF_RANK_NONZERO=True)

    # bases = get_bases(matrix_indep_rows, plus, minus)
    # for i in range(mpi_rank, nr_tests, mpi_size):
    #     plus_index = i // len(minus)
    #     minus_index = i % len(minus)
    #     basis = bases[plus_index, minus_index]
    #     res = determine_adjacency(matrix_indep_rows, plus[plus_index], minus[minus_index], basis)
    #     if res == 1:
    #         adjacency.append((plus[plus_index], minus[minus_index]))
    #     if i % 100 == 0:
    #         mp_print("Process %d is now on adjacency test %d" % (mpi_rank, i))

    # MPI communication step
    adj_sets = world_allgather(adjacency)
    for i in range(mpi_size):
        if i != mpi_rank:
            adjacency.extend(adj_sets[i])
    adjacency.sort()

    end = time()
    mp_print("Did LPs in %f seconds" % (end - start))
    return adjacency
def kkt_check(c, A, x, basis, p, m, tol=1e-8, threshold=1e-3, max_iter=100000, verbose=True):
    """
    Determine whether KKT conditions hold for x0.
    Take size 0 steps if available.
    """
    improvement = False
    init_actives = [p, m]
    ab = np.arange(A.shape[0])
    a = np.arange(A.shape[1])

    maxupdate = 10
    B = BGLU(A, basis, maxupdate, False)
    # for iteration in range(max_iter):
    iteration = 0
    while True:
        bl = np.zeros(len(a), dtype=bool)
        bl[basis] = 1
        xb = x[basis]

        try:
            l = B.solve(c[basis], transposed=True)  # similar to v = linalg.solve(B.T, c[basis])
        except LinAlgError:
            np.set_printoptions(threshold=np.inf)
            mp_print('This matrix seems to be singular:', PRINT_IF_RANK_NONZERO=True)
            mp_print(B.B, PRINT_IF_RANK_NONZERO=True)
            mp_print('Iteration:' + str(iteration), PRINT_IF_RANK_NONZERO=True)
            mp_print('u:', PRINT_IF_RANK_NONZERO=True)
            mp_print(u, PRINT_IF_RANK_NONZERO=True)
            mp_print('plus, minus:', PRINT_IF_RANK_NONZERO=True)
            mp_print(p, PRINT_IF_RANK_NONZERO=True)
            mp_print(m, PRINT_IF_RANK_NONZERO=True)
            print("LinAlgError in B.solve")
            np.set_printoptions(threshold=1000)
            return True, 1

        sn = c - l.dot(A)  # reduced cost
        sn = sn[~bl]

        if np.all(sn >= -tol):  # in this case x is an optimal solution
            # if verbose:
            #     mp_print("Did %d steps in kkt_check, found True - smallest sn: %.8f" % (iteration - 1, min(sn)))
            return True, 0

        entering = a[~bl][np.argmin(sn)]
        u = B.solve(A[:, entering])

        i = u > tol  # if none of the u are positive, unbounded
        if not np.any(i):
            mp_print("Warning: unbounded problem in KKT_check")
            # if verbose:
            #     mp_print("Did %d steps in kkt_check2" % iteration - 1)
            return True, 1

        th = xb[i] / u[i]
        l = np.argmin(th)  # implicitly selects smallest subscript
        if basis[i][l] in init_actives:  # if either plus or minus leaves basis, LP has made significant improvement
            improvement = True

        step_size = th[l]  # step size

        # Do pivot
        x[basis] = x[basis] - step_size * u
        x[entering] = step_size
        x[abs(x) < 10e-20] = 0
        B.update(ab[i][l], entering)  # modify basis
        basis = B.b

        # if np.dot(c, x) < -threshold:  # found a better solution, so not adjacent
        if improvement:
            if not np.dot(c, x) < -threshold:
                mp_print('Original way of finding non-adjacents does not say these are non-adjacent', True)
            # if verbose:
            #     mp_print("Did %d steps in kkt_check, found False - c*x %.8f" % (iteration, np.dot(c, x)))
            return False, 0

        iteration += 1
        if iteration % 10000 == 0:
            print("Warning: reached %d iterations" % iteration)
        if iteration % max_iter == 0:
            mp_print("Cycling? Starting again with new perturbation.")
            return True, 2
    return True, 1
def intersect_directly(R, internal_metabolites, network, verbose=True, tol=1e-12, sort_order='min_adj',
                       intermediate_cone_path='', manual_override = ''):
    """

    :param R:
    :param internal_metabolites:
    :param network:
    :param verbose:
    :param tol:
    :param sort_order: Different options for determining metabolite intersection order. As a default we will intersect
    the metabolite that adds the minimal number of adjacencies in the model. Other options are 'min_lp',
    'max_lp_per_adj', and 'min_connections'.
    :return:
    """
    if intermediate_cone_path:
        R, internal_metabolites, network = pick_up_intermediate_cone(internal_metabolites, network,
                                                                     intermediate_cone_path)

    # rows are metabolites
    deleted = np.array([])
    it = 1
    internal = list(internal_metabolites)
    internal.sort()
    rows_removed_redund = 0

    while len(internal) > 0:
        sorting = sort_order

        # For each internal metabolite, calculate the number of producing reactions times the number of consuming
        # R[j-len(deleted[deleted<j]) is the current row for the metabolite that was once at the j-th place
        n_lps = [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0) for j
                 in internal]
        # If there is a metabolite that can be deleted without any lps being done, we will do it immediately
        if np.min(n_lps) == 0:
            sorting = 'min_lp'
        if manual_override and (it == 1):
            i = internal[int(manual_override)]
        elif sorting == 'min_lp':
            i = internal[np.argmin(n_lps)]
        elif sorting == 'min_connections':
            # Alternative way of choosing metabolite, choose the one that is minimally connected
            connections = []
            adj = get_metabolite_adjacency(R)
            for met in internal:
                curr_ind = met - len(deleted[deleted < met])
                connections.append(int(np.sum(adj[:, curr_ind])))

            min_connect_inds = np.array(internal)[np.where(connections == np.min(connections))[0]]
            # Pick the one with least LPs to be done, if equally connected
            i = min_connect_inds[np.argmin(
                [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0)
                 for j in min_connect_inds])]

        elif sorting == 'min_adj' or sorting == 'max_lp_per_adj':
            # Alternative way of choosing metabolite, choose the one that increases adjacencies the least
            adj_added = []  # This will contain for each metabolite the number connections between metabs removal adds
            adj = get_metabolite_adjacency(R)
            old_n_adjs = np.sum(adj)
            for met in internal:
                new_adj = adj.copy()
                curr_ind = met - len(deleted[deleted < met])
                new_adj[np.where(adj[:, curr_ind] != 0), :] += new_adj[curr_ind, :]
                np.fill_diagonal(new_adj, 0)
                new_adj = np.minimum(new_adj, 1)
                new_adj = np.delete(np.delete(new_adj, curr_ind, axis=0), curr_ind, axis=1)
                new_n_adjs = np.sum(new_adj)
                adj_added.append(int(new_n_adjs - old_n_adjs))

            if sorting == 'min_adj':
                min_adj_inds = np.array(internal)[np.where(adj_added == np.min(adj_added))[0]]
                # Pick the one with least LPs to be done, if adding equal adjacencies
                i = min_adj_inds[np.argmin(
                    [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(
                        R[j - len(deleted[deleted < j]), :] < 0) for j in min_adj_inds])]
            elif sorting == 'max_lp_per_adj':
                lp_per_adj = np.array(
                    [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0)
                     for j in internal]) / (np.array(adj_added) - np.min(adj_added) + 1)
                i = internal[np.argmax(lp_per_adj)]

        # i - len(deleted[deleted<i] is the current row for the metabolite that was once at the ith place
        to_remove = i - len(deleted[deleted < i])
        if verbose:
            mp_print("\n\nIteration %d (internal metabolite = %d: %s) of %d" % (
                it, to_remove, [m.id for m in network.metabolites][to_remove], len(internal_metabolites)))
            mp_print("Possible LP amounts for this step:\n" + ", ".join(np.array(n_lps).astype(str)))
            mp_print("Total: %d" % sum(n_lps))
            if manual_override and (it == 1):
                mp_print("Sorting was manually chosen for this first step.\n")
            elif sorting == 'min_adj':
                mp_print("Possible adjacencies added for this step:\n" + ", ".join(np.array(adj_added).astype(str)))
                mp_print("Minimal adjacency option chosen.\n")
            elif sorting == 'max_lp_per_adj':
                mp_print("Possible lps per adjacency added for this step:\n" + ", ".join(
                    np.round(np.array(lp_per_adj), 2).astype(str)))
                mp_print("Rescaled maximal LPs per added adjacency option chosen.\n")
            elif sorting == 'min_connections':
                mp_print("Possible connectedness of metabolites for this sstep:\n" + ", ".join(
                    np.array(connections).astype(str)))
                mp_print("Minimally connected option chosen.\n")
            elif sorting == 'min_lp':
                mp_print("Minimal LPs chosen.\n")
            it += 1

        # input("waiting")
        if np.sum(R[i - len(deleted[deleted < i]), :] > 0) * np.sum(R[i - len(deleted[deleted < i]), :] < 0) == 0:
            R = iteration_without_lps(R, to_remove, network)
        else:
            R, removed = eliminate_metabolite(R, to_remove, network, calculate_adjacency=True)
            rows_removed_redund += removed
        deleted = np.append(deleted, i)
        internal.remove(i)

        if get_process_rank() == 0:
            try:
                metab_ids = [metab.id for metab in network.metabolites]
                np.savetxt('intermediate_conversion_cone.csv', np.transpose(R), delimiter=',',
                           header=','.join(metab_ids), comments='')
            except OverflowError:
                mp_print('Intermediate result cannot be stored due to too large numbers.')

    # remove artificial rays introduced by splitting metabolites
    R, ids = unsplit_metabolites(R, network)

    if verbose:
        mp_print("\n\tRows removed by redund overall: %d\n" % rows_removed_redund)
        if rows_removed_redund != 0:
            pass
            # input("Waiting...")

    return R, ids
Ejemplo n.º 21
0
def drop_redundant_rays(ray_matrix,
                        verbose=True,
                        use_pre_filter=False,
                        rays_are_unique=True,
                        linearities=False,
                        normalised=True):
    """

    :param ray_matrix:
    :param verbose:
    :param use_pre_filter: Sometimes, use_pre_filter=True can speed up the calculations, but mostly it doesn't
    :param rays_are_unique: Boolean that states whether rays given as input are already unique
    :param linearities: Boolean indicating if linearities are still present
    :param normalised: Boolean indicating if ray_matrix columns are already normalised
    :return:
    """
    extreme_inds = []
    non_extreme_inds = []
    if not rays_are_unique:
        # First make sure that no duplicate rays are in the matrix
        ray_matrix = np.transpose(
            unique(np.transpose(normalize_columns_fraction(ray_matrix))))

    # Find 'cycles': combinations of columns of matrix_indep_rows that add up to zero, and remove them
    if linearities:
        if verbose:
            mp_print('Detecting linearities in H_ineq.')
        ray_matrix, cycle_rays = remove_cycles_redund(ray_matrix)
    else:
        cycle_rays = np.zeros((ray_matrix.shape[0], 0))

    if not normalised:
        if verbose:
            mp_print('Normalizing columns.')
        matrix_normalized = normalize_columns(ray_matrix)
    else:
        matrix_normalized = np.array(ray_matrix, dtype='float')

    if verbose:
        mp_print('Selecting independent rows.')
    matrix_indep_rows = independent_rows_qr(matrix_normalized)

    if use_pre_filter:
        filtered_inds = pre_redund(matrix_indep_rows)
    else:
        filtered_inds = np.arange(matrix_indep_rows.shape[1])

    mpi_size = mpi_wrapper.get_process_size()
    mpi_rank = mpi_wrapper.get_process_rank()

    matrix_indep_rows = matrix_indep_rows[:, filtered_inds]

    # then find any column basis of R_indep
    start_basis = get_basis_columns_qr(matrix_indep_rows)
    start_basis_inv = np.linalg.inv(matrix_indep_rows[:, start_basis])

    number_rays = matrix_indep_rows.shape[1]
    for i in range(number_rays):
        if i % mpi_size == mpi_rank:
            if i % (10 * mpi_size) == mpi_rank:
                mp_print(
                    "Process %d is on redundancy test %d of %d (%f %%). Found %d redundant rays."
                    % (mpi_rank, i, number_rays, i / number_rays * 100,
                       len(non_extreme_inds)),
                    PRINT_IF_RANK_NONZERO=True)
            basis = add_first_ray(matrix_indep_rows, start_basis_inv,
                                  start_basis, i)
            extreme = check_extreme(matrix_indep_rows, i, basis)
            if extreme:
                extreme_inds.append(filtered_inds[i])
            else:
                non_extreme_inds.append(filtered_inds[i])

    # MPI communication step
    extreme_sets = mpi_wrapper.world_allgather(extreme_inds)
    for i in range(mpi_size):
        if i != mpi_rank:
            extreme_inds.extend(extreme_sets[i])
    extreme_inds.sort()

    return extreme_inds, cycle_rays
def check_if_intermediate_cone_exists(intermediate_cone_path):
    if os.path.exists(intermediate_cone_path):
        mp_print('Will try to pick up intermediate cone, file found.')
    else:
        mp_print('Tried to pick up intermediate cone, but file not found.')
def get_basis_columns_qr(A, tol=1e-10, verbose=False):
    if verbose:
        mp_print("Finding set of linearly independent columns.")

    col_inds = licols(A, tol=tol)
    return col_inds
def remove_cycles(R, network, tol=1e-12, verbose=True):
    """Detect whether there are cycles, by doing an LP. If LP is unbounded find a minimal cycle. Cancel one metabolite
    with the cycle."""
    external_cycles = []
    count_since_last_redund = 0
    A_eq, b_eq, c, x0 = setup_cycle_LP(independent_rows_qr(normalize_columns(R)), only_eq=True)
    # A_eqtest, b_eqtest, ctest, x0test = setup_cycle_LP(independent_rows(normalize_columns(R)), only_eq=True)
    basis = get_basis_columns_qr(np.asarray(A_eq, dtype='float'))
    b_eq, x0 = perturb_LP(b_eq, x0, A_eq, basis, 1e-10)
    cycle_present, status, cycle_indices = cycle_check_with_output(c, np.asarray(A_eq, dtype='float'), x0, basis)

    if status != 0:
        A_ub, b_ub, A_eq, b_eq, c, x0 = setup_cycle_LP(independent_rows(normalize_columns(R)))
        res = linprog(c, A_ub, b_ub, A_eq, b_eq, method='revised simplex', options={'tol': 1e-12},
                      x0=x0)
        if res.status == 4:
            mp_print("Numerical difficulties with revised simplex, trying interior point method instead")
            res = linprog(c, A_ub, b_ub, A_eq, b_eq, method='interior-point', options={'tol': 1e-12})

        cycle_present = True if np.max(res.x) > 90 else False
        if cycle_present:
            cycle_indices = np.where(res.x > 90)[0]
        if np.any(np.isnan(res.x)):
            raise Exception('Remove cycles did not work, because LP-solver had issues. Try to solve this.')

    # if the objective is unbounded, there is a cycle that sums to zero
    while cycle_present:
        # Find minimal cycle
        met = -1
        counter = 0
        removable_is_ext = False
        while met < 0 and (not removable_is_ext):
            cycle_ind = cycle_indices[counter]
            met = get_remove_metabolite(R, network, cycle_ind)
            if met == -1:  # Only external metabolites used in this ray, delete external and remember the ray
                removable_is_ext = True
                met = get_remove_metabolite(R, network, cycle_ind, allow_external=True)

            counter = counter + 1
            if counter > len(cycle_indices):
                mp_print('No internal metabolite was found that was part of the cycle. This might cause problems.')

        if verbose:
            mp_print("Found a cycle, cancelling metabolite %d (%s) with reaction %d" % (
                met, network.metabolites[met].id, cycle_ind))

        R, external_cycle = cancel_with_cycle(R, met, cycle_ind, network, removable_is_ext=removable_is_ext)
        if external_cycle:
            external_cycles.append(external_cycle)
        count_since_last_redund = count_since_last_redund + 1

        if count_since_last_redund > 10:
            count_since_last_redund = 0
            R = np.transpose(R)
            rows_before = R.shape[0]

            if verbose:
                mp_print("\tDimensions before redund: %d %d" % (R.shape[0], R.shape[1]))
            start = time()
            R = redund(R)
            end = time()
            if verbose:
                mp_print("\tDimensions after redund: %d %d" % (R.shape[0], R.shape[1]))
                mp_print("\t\tRows removed by redund: %d" % (rows_before - R.shape[0]))
                mp_print("\tRedund took %f seconds" % (end - start))

            R = np.transpose(R)

        A_eq, b_eq, c, x0 = setup_cycle_LP(independent_rows_qr(normalize_columns(R)), only_eq=True)

        # Do new LP to check if there is still a cycle present.
        basis = get_basis_columns_qr(np.asarray(A_eq, dtype='float'))
        b_eq, x0 = perturb_LP(b_eq, x0, A_eq, basis, 1e-10)
        cycle_present, status, cycle_indices = cycle_check_with_output(c, np.asarray(A_eq, dtype='float'), x0, basis)

        if status != 0:
            A_ub, b_ub, A_eq, b_eq, c, x0 = setup_cycle_LP(independent_rows(normalize_columns(R)))
            res = linprog(c, A_ub, b_ub, A_eq, b_eq, method='revised simplex', options={'tol': 1e-12},
                          x0=x0)
            if res.status == 4:
                mp_print("Numerical difficulties with revised simplex, trying interior point method instead")
                res = linprog(c, A_ub, b_ub, A_eq, b_eq, method='interior-point', options={'tol': 1e-12})

            cycle_present = True if np.max(res.x) > 90 else False
            if cycle_present:
                cycle_indices = np.where(res.x > 90)[0]
            if np.any(np.isnan(res.x)):
                raise Exception('Remove cycles did not work, because LP-solver had issues. Try to solve this.')

    # Do redund one more time
    R = np.transpose(R)
    rows_before = R.shape[0]

    if verbose:
        mp_print("\tDimensions before redund: %d %d" % (R.shape[0], R.shape[1]))
    start = time()
    R = redund(R)
    end = time()
    if verbose:
        mp_print("\tDimensions after redund: %d %d" % (R.shape[0], R.shape[1]))
        mp_print("\t\tRows removed by redund: %d" % (rows_before - R.shape[0]))
        mp_print("\tRedund took %f seconds" % (end - start))

    R = np.transpose(R)

    network.N = R
    if not len(external_cycles):
        external_cycles = None
    return R, network, external_cycles
def cycle_check_with_output(c, A, x, basis, tol=1e-8, threshold=10, max_iter=100000, verbose=True,find_all_entering=False):
    """
    Determine whether KKT conditions hold for x0.
    Take size 0 steps if available.
    """
    ab = np.arange(A.shape[0])
    a = np.arange(A.shape[1])

    maxupdate = 10
    B = BGLU(A, basis, maxupdate, False)
    for iteration in range(max_iter):
        bl = np.zeros(len(a), dtype=bool)
        bl[basis] = 1
        xb = x[basis]

        try:
            l = B.solve(c[basis], transposed=True)  # similar to v = linalg.solve(B.T, c[basis])
        except LinAlgError:
            return False, 1, [-1]
        sn = c - l.dot(A)  # reduced cost
        sn = sn[~bl]

        if np.all(sn >= -tol):  # in this case x is an optimal solution
            # if verbose:
            #     mp_print("Did %d steps in kkt_check, found True - smallest sn: %.8f" % (iteration - 1, min(sn)))
            return False, 0, [-1]

        entering = a[~bl][np.argmin(sn)]
        u = B.solve(A[:, entering])

        i = u > tol  # if none of the u are positive, unbounded
        if not np.any(i):
            if verbose:
                mp_print("Unbounded problem in cycle detection, so cycle is present")
            # if verbose:
            #     mp_print("Did %d steps in kkt_check2" % iteration - 1)
            if find_all_entering:
                # x[basis] = x[basis] - 1 * u
                # x[entering] = 1
                # all_entering = np.where(x>1e-6)[0]

                all_entering = basis[np.where(u < -1e-5)[0]]
                all_entering = np.append(all_entering, entering)
            else:
                all_entering = [entering]
            return True, 0, all_entering

        th = xb[i] / u[i]
        l = np.argmin(th)  # implicitly selects smallest subscript
        step_size = th[l]  # step size

        # Do pivot
        x[basis] = x[basis] - step_size * u
        x[entering] = step_size
        x[abs(x) < 10e-20] = 0
        B.update(ab[i][l], entering)  # modify basis
        basis = B.b

        if np.dot(c, x) < -threshold:  # found a better solution, so not adjacent
            if verbose:
                mp_print("Cycle check has invalid exit status. Numerical issue likely occurred.")
            return True, 1, [np.argmax(x)]

    mp_print("Cycling?")
    return False, 1, [-1]
def eliminate_metabolite(R, met, network, calculate_adjacency=True, tol=1e-12, verbose=True):
    # determine +/0/-
    plus = []
    zero = []
    minus = []
    for reaction in range(R.shape[1]):
        result = R[met, reaction]
        if abs(result) <= tol:
            zero.append(reaction)
        elif result > tol:
            plus.append(reaction)
        elif result < -tol:
            minus.append(reaction)
        else:
            zero.append(reaction)
    if verbose:
        mp_print("\tNumber of +: %d" % len(plus))
        mp_print("\tNumber of -: %d" % len(minus))
        mp_print("\tNumber of LP to do: %d" % (len(plus) * len(minus)))

    # start next matrix with zero rows
    next_matrix = []
    for z in zero:
        col = R[:, z]
        next_matrix.append(col)

    nr_adjacent = 0
    if calculate_adjacency:
        adj = geometric_ray_adjacency(R, plus=plus, minus=minus, verbose=verbose,
                                      remove_cycles=True)
        # combine + and - if adjacent
        for (p, m) in adj:
            nr_adjacent += 1
            rp = R[met, p]
            rm = R[met, m]
            new_row = (rp * R[:, m] - rm * R[:, p]) / np.gcd(rp, rm)
            if sum(abs(new_row)) > tol:
                next_matrix.append(new_row)
    else:  # calculate_adjacency is off
        for p in plus:
            for m in minus:
                nr_adjacent += 1
                rp = R[met, p]
                rm = R[met, m]
                new_row = rp * R[:, m] - rm * R[:, p]
                if sum(abs(new_row)) > tol:
                    next_matrix.append(new_row)

    if verbose:
        if len(plus) * len(minus) > 0:
            mp_print("Of %d candidates, %d were adjacent (%.2f percent)" % (
                len(plus) * len(minus), nr_adjacent, 100 * nr_adjacent / (len(plus) * len(minus))))
        else:
            mp_print("Of %d candidates, %d were adjacent (0 percent)" % (len(plus) * len(minus), nr_adjacent))

    next_matrix = np.asarray(next_matrix)

    rows_removed_redund = 0
    # redund in case we have too many rows
    if not calculate_adjacency:
        rows_before = next_matrix.shape[0]

        if verbose:
            mp_print("\tDimensions before redund: %d %d" % (next_matrix.shape[0], next_matrix.shape[1]))
        start = time()
        next_matrix = redund(next_matrix)
        end = time()
        rows_removed_redund = rows_before - next_matrix.shape[0]
        if verbose:
            mp_print("\tDimensions after redund: %d %d" % (next_matrix.shape[0], next_matrix.shape[1]))
            mp_print("\t\tRows removed by redund: %d" % (rows_before - next_matrix.shape[0]))
            mp_print("\tRedund took %f seconds" % (end - start))

    next_matrix = np.transpose(next_matrix)

    # delete all-zero row
    next_matrix = np.delete(next_matrix, met, 0)
    network.drop_metabolites([met])
    mp_print("After removing this metabolite, we have %d metabolites and %d rays" %
             (next_matrix.shape[0], next_matrix.shape[1]))

    return next_matrix, rows_removed_redund