def find_temporal_adjacency_matrix(min_abundance, phylo_column, full_svd):
    """
    Find the adjacency matrix among clusters of bacteria from week to week,
    assuming the interaction between clusters is changing.

    :param min_abundance: ignore the bacteria if their abundance is always
    below the min_abundance
    :param phylo_column: the data is clustered based on the phylo_column
    :param full_svd:the method of singular value decomposition. full SVD is
    more accurate and slower than the reduced SVD
    """
    # Default values
    if min_abundance is None:
        min_abundance = 0
    if phylo_column is None:
        phylo_column = 'family'
    if full_svd is None:
        full_svd = False
    # snapshots of samples over 11 weeks
    # todo: python reserves capital letters for classes.
    snapshots = prepare_DMD_matrices(min_abundance, phylo_column, oxygen='all', debug=False)
    linear_mappings = {}
    nodes_list = {}
    for descriptive_tuple in snapshots.keys():
        df = snapshots[descriptive_tuple]
        data = df.values
        for time in range(10):
            X = data[:, time:time+1]
            Y = data[:, time+1:time+2]
            # Preprocess the abundance data
            X = normalize(X, axis=0)
            Y = normalize(Y, axis=0)
            U, s, V = np.linalg.svd(X, full_matrices=full_svd)
            if full_svd is True:  # slower
                S = np.zeros((len(U), len(s)), dtype=complex)
                S[:len(s), :len(s)] = np.diag(s)
                pseu_inv_x = np.dot(np.linalg.inv(V),
                                    np.dot(np.linalg.pinv(S), np.linalg.inv(U)))
            else:  # faster
                S = np.diag(s)
                pseu_inv_x = np.dot(np.linalg.inv(V),
                                np.dot(np.linalg.inv(S), np.linalg.pinv(U)))
            # Adjacency matrix between clusters
            A = np.dot(Y, pseu_inv_x)
            # A = np.dot(Y, np.linalg.pinv(X))  # full SVD (slower)
            key = descriptive_tuple + ('Week ' + str(time+1),)
            linear_mappings[key] = A
            nodes_list[key] = list(df.index)
    return linear_mappings, nodes_list
def find_fixed_adjacency_matrix(min_abundance=0.0, phylo_column='order',
                                full_svd=True):
    """
    This function find the adjacency matrix among clusters of bacteria over
    the 11 weeks of sampling assuming the interaction between clusters is
    fixed.

    It creates a dictionary of descriptive tuples like ("High", 2) for
    high-oxygen week 2, and corresponding dataframe values.  These
    dataframes have weeks as columns and taxa ("bacteria") as rows.

    Unlike find_temporal_adjacency_matrix(), we get only one predictive
    matrix that represents the 10 transitions between sampling points.

    Since the dictionary has 8 tuple keys for High/Low oxygen and 4
    replicates for each condition, 8 interaction ("A") matrices are created.

    These are accessed by the dictionary linear_mappings, with the same
    tuples as keys.

    The names of each node can be accessed by nodes_list, the other output.

    :param min_abundance: minimum abundance to loook for in original data
    :param phylo_column: most detailed phylogenetic column to consider
    :param full_svd: if True, runs the full svd algorithm.  If False,
    runs a faster version.
    """
    # Default values
    if min_abundance is None:
        min_abundance = 0
    if phylo_column is None:
        phylo_column = 'order'
    if full_svd is None:
        full_svd = False
    # snapshots of samples over 11 weeks
    snapshots = prepare_DMD_matrices(min_abundance, phylo_column, oxygen='all',debug=False)
    linear_mappings = {}
    nodes_list = {}
    for descriptive_tuple in snapshots.keys():
        df = snapshots[descriptive_tuple]
        data = df.values
        X = data[:, 0:10]
        Y = data[:, 1:11]
        # Preprocess the abundance data
        X = normalize(X, axis=0)
        Y = normalize(Y, axis=0)
        U, s, V = np.linalg.svd(X, full_matrices=full_svd)
        if full_svd is True:  # slower
            S = np.zeros((len(U), len(s)), dtype=float)
            S[:len(s), :len(s)] = np.diag(s)
            pseu_inv_x = np.dot(np.linalg.inv(V),
                                np.dot(np.linalg.pinv(S), np.linalg.inv(U)))
        else:  # faster
            S = np.diag(s)
            pseu_inv_x = np.dot(np.linalg.inv(V),
                                np.dot(np.linalg.inv(S), np.linalg.pinv(U)))
        # Adjacency matrix between clusters
        A = np.dot(Y, pseu_inv_x)
        # A = np.dot(Y, np.linalg.pinv(X))  # full SVD (slower)
        linear_mappings[descriptive_tuple] = A
        nodes_list[descriptive_tuple] = list(df.index)
    return linear_mappings, nodes_list