Python Matrixの例、lmpy.Matrix Pythonの例

コード例 #1

0

ファイルを表示

ファイル: join_env_and_pam_stats.py プロジェクト: biotaphy/projects

def join_encoded_layers_and_pam_stats(encoded_layers, pam_stats):
    """Concatenate encoded layers and pam statistics.

    Note:
        The PAM statistics, very likely, represent a subset of cells that are
        in the shapegrid and therefore must be "inflated" to match the same
        sites.
    """
    row_headers = encoded_layers.get_row_headers()

    new_stats_mtx = Matrix(np.zeros(
        (len(row_headers), len(pam_stats.get_column_headers()))),
                           headers={
                               '0': row_headers,
                               '1': pam_stats.get_column_headers()
                           })

    # Set values in new stats matrix
    # Note: This is somewhat fragile.  It requires that encoded_layers row site
    #    ids be a superset of pam_stats row site ids.  Consider either forcing
    #    the data to match or something more robust for a more official version
    all_site_ids = np.array(
        [int(site_id) for site_id, _, _ in encoded_layers.get_row_headers()])
    ps_site_ids = np.array(
        [int(site_id) for site_id, _, _ in pam_stats.get_row_headers()])
    data_idxs = np.take(all_site_ids, ps_site_ids)
    for i in range(len(data_idxs)):
        new_stats_mtx[data_idxs[i]] = pam_stats[i]

    # Concatenate and return
    joined_mtx = Matrix.concatenate([encoded_layers, new_stats_mtx], axis=1)
    return joined_mtx

コード例 #2

0

ファイルを表示

ファイル: test_permutation_testing.py プロジェクト: biotaphy/BiotaPhyPy

 def test_with_signed_value_comparison(self):
     """Tests that getting p-values does what is expected."""
     obs_matrix = Matrix(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))
     rand_1 = Matrix(np.array([[3, 2, 1], [6, 3, -12], [8, 3, -10]]))
     rand_2 = Matrix(np.array([[9, 23, 1], [4, 2, 9], [-32, -3, 9]]))
     p_vals = perm_testing.get_p_values(
         obs_matrix, [rand_1, rand_2],
         compare_func=perm_testing.compare_signed_values)
     assert np.all(
         p_vals[:, :,
                0] == np.array([[1, 0.5, 0], [0.5, 0, 0.5], [0.5, 0, 0]]))

コード例 #3

0

ファイルを表示

 def test_valid(self):
     """Tests that correcting p-values does what is expected.
     """
     uncorrected = Matrix(
         np.array([[0.05, 0.1, 0.02], [0.01, 0.05, 0.06], [0.1, 0.01,
                                                           0.20]]))
     corrected = perm_testing.correct_p_values(uncorrected)

コード例 #4

0

ファイルを表示

def get_p_values(observed_matrix,
                 test_matrices,
                 compare_func=compare_absolute_values):
    """Gets p-values by comparing the observed and random data.

    Args:
        observed_matrix (:obj: `Matrix`): A Matrix object with observed values
        test_matrices (:obj: `list`): A list of Matrix objects with values
            obtained through permutations
        compare_func (:obj: `function`): A function that, when given two
            values, returns True if the second meets the condition

    Returns:
        numpy.ndarray: An array of p-values.

    Todo:
        * Take optional clip values
        * Take optional number of permutations
    """
    p_val_headers = deepcopy(observed_matrix.headers)
    ndim = observed_matrix.ndim
    p_val_headers[str(ndim)] = ['P-Values']

    # Create the P-values matrix.  The shape should be the same as the observed
    #    data with one extra dimension if the last dimension has size > 1
    if observed_matrix.shape[-1] == 1:  # pragma: nocover
        p_vals_shape = observed_matrix.shape
    else:  # pragma: nocover
        p_vals_shape = list(observed_matrix.shape) + [1]
    p_values = Matrix(np.zeros(p_vals_shape), headers=observed_matrix.headers)

    num_permutations = 0
    for rand in test_matrices:
        # If the random matrices are a stack with more dimensions or more
        #    layers, compare each layer to observed
        if rand.ndim > ndim or (rand.shape[-1] >
                                observed_matrix.shape[-1]):  # pragma: nocover
            # Determine shape of test matrix
            if rand.ndim > ndim:
                test_shape = list(rand.shape)[:-1]
            else:
                test_shape = observed_matrix.shape
            # Loop through each
            for i in range(rand.shape[-1]):
                p_values += compare_func(
                    observed_matrix,
                    # Slice off one test layer
                    rand[..., i].reshape(test_shape))
                num_permutations += 1
        elif rand.ndim < len(p_vals_shape):  # pragma: nocover
            p_values += compare_func(observed_matrix,
                                     rand).reshape(p_vals_shape)
            num_permutations += 1
        else:  # pragma: nocover
            p_values += compare_func(observed_matrix, rand)
            num_permutations += 1

    # Divide by number of permutations and clip just in case
    p_values = np.clip(np.nan_to_num(p_values / num_permutations), 0.0, 1.0)
    return p_values

コード例 #5

0

ファイルを表示

ファイル: package_outputs_to_matrix_and_tree.py プロジェクト: biotaphy/projects

def get_matrix(csv_fn):
    squids = []
    row_headers = []
    #data = None
    data = []
    with open(csv_fn) as in_file:
        header = True
        for line in in_file:
            if header:
                header = False
                squids = line.strip().split(',')[3:]
                #data = np.zeros(())
            else:
                parts = line.strip().split(',')
                row_headers.append(tuple([float(i) for i in parts[0:3]]))
                data.append([int(i) for i in parts[3:]])
    #print(len(row_headers))
    #print(len(squids))
    #259200
    #print(squids)
    mtx = Matrix(np.array(data, dtype=np.int0),
                 headers={
                     '0': row_headers,
                     '1': squids
                 })
    return mtx

コード例 #6

0

ファイルを表示

ファイル: data_readers.py プロジェクト: hmarx/analyses

def get_character_matrix_from_sequences_list(sequences, var_headers=None):
    """Converts a list of sequences into a character matrix.

    Args:
        sequences (:obj:`list` of :obj:`Sequence`): A list of Sequence objects
            to be converted.
        var_headers (:obj:`list` of headers, optional): If provided, uses these
            as variable headers for the columns in the matrix.

    Returns:
        Matrix: A matrix of sequence data.
    """
    if var_headers is not None:
        col_headers = var_headers
    else:
        col_headers = [
            'Column {}'.format(i) for i in range(len(sequences[0].cont_values))
        ]
    data = np.zeros((len(sequences), len(col_headers)), dtype=float)
    row_headers = []
    i = 0
    for seq in sequences:
        row_headers.append(seq.name)
        data[i] = np.array(seq.cont_values)
        i += 1
    return Matrix(data, headers={'0': row_headers, '1': col_headers})

コード例 #7

0

ファイルを表示

ファイル: phylo_beta_diversity.py プロジェクト: biotaphy/BiotaPhyPy

def pdnew(pam, tree):
    """Creates a lookup dictionary for PD of sites in a matrix.

    Args:
        pam (:obj:`Matrix`): A Lifemapper Matrix object with presence absence values.
        tree (:obj:`TreeWrapper`): A TreeWrapper object for a wrapped Dendropy
            phylogenetic tree.

    Returns:
        Matrix object w/ PD values & spp. rch for each community in sample.
        Col1 = PD; Col2 = SR; Rows = Indvidual samples from pam.
    """
    # Get number of samples in community matrix data.
    nsamp = len(pam.get_row_headers())

    # Array to hold each sample's PD & RCH. col1 = PD; col2 = RCH.
    PD_array = np.zeros((nsamp, 2), dtype=float)

    # This loop will calculate the PD value for each sample in 'pam'.
    for sample in range(nsamp):
        # Pull out the data for current sample.
        my_samp = pam[sample]

        # print my_samp
        # print pam.get_row_headers(),"\n"

        # Pull out which spp are present & which are absent from the sample.
        # yields lists of strings --> spp names.
        sp_pres = list(it.compress(pam.get_column_headers(), my_samp))
        # Dendropy does not retain underscores in labels
        sp_pres = [i.replace('_', ' ') for i in sp_pres]

        # print "The following spp are present in the sample:\n", sp_pres,"\n"
        # print tree

        # Get a tree of the spp present in the sample.
        tree_pres = tree.extract_tree_with_taxa_labels(sp_pres)

        # print tree_pres,"\n**********\n"

        # Get sum of edge lengths for each sub tree.
        PD_pres = tree_pres.length()

        # Get spp rch of sample.
        rch_samp = len(sp_pres)

        # Update PD_array.
        PD_array[sample] = [PD_pres, rch_samp]

    # convert PD_array to Matrix object & match headers to pam data.
    PD_mat = Matrix(
        PD_array,
        headers={'0': pam.get_row_headers(), '1': ['PD', 'SR']}
        )

    # return values.
    return PD_mat

コード例 #8

0

ファイルを表示

ファイル: test_annotators.py プロジェクト: biotaphy/BiotaPhyPy

 def test_valid(self):
     """Test the function with valid inputs."""
     # Create a tree
     tree = TreeWrapper.get(data='(A,(B,((C,D),(E,F))));', schema='newick')
     mtx = Matrix(np.random.random((6, 2, 1)),
                  headers={
                      '0': ['A', 'B', 'C', 'D', 'E', 'F'],
                      '1': ['label', 'other_val']
                  })
     # This should not fail
     annotators.annotate_tree_with_label(tree, mtx, label_column=0)

コード例 #9

0

ファイルを表示

ファイル: speed_tests.py プロジェクト: cjgrady/randomness_tester

def load_pams(pam_dir):
    """Loads PAMs from CSV files in the specified directory

    Args:
        pam_dir (str): A directory containing PAM CSV files
    """
    pams = []
    for fn in glob.glob(os.path.join(pam_dir, '*.csv')):
        with open(fn) as in_f:
            pams.append(
                Matrix.load_csv(in_f, num_header_rows=1, num_header_cols=3))
    return pams

コード例 #10

0

ファイルを表示

ファイル: test_phylo_beta_diversity.py プロジェクト: biotaphy/BiotaPhyPy

    def test_valid(self, valid_phylo_beta_diversity_package):
        """Test the method with valid data.

        Args:
            valid_phylo_beta_diversity_package (tuple): A tuple of information that
                together forms a valid phylogenetic beta diversity package.

        Note:
            * Test values were determined from example at
                https://rdrr.io/rforge/betapart/man/phylo.beta.pair.html
        """
        (pam_fn, tree_fn, _, _, _, test_beta_sim_fn, test_beta_sne_fn,
         test_beta_sor_fn, _, _, _, test_phylo_beta_sim_fn,
         test_phylo_beta_sne_fn,
         test_phylo_beta_sor_fn) = valid_phylo_beta_diversity_package

        with open(pam_fn) as in_f:
            pam = Matrix.load_csv(in_f, num_header_rows=1, num_header_cols=1)
        tree = TreeWrapper.from_filename(tree_fn)
        with open(test_beta_sim_fn) as in_f:
            test_beta_sim = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_sne_fn) as in_f:
            test_beta_sne = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_sor_fn) as in_f:
            test_beta_sor = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_phylo_beta_sim_fn) as in_f:
            test_phylo_beta_sim = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_sne_fn) as in_f:
            test_phylo_beta_sne = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_sor_fn) as in_f:
            test_phylo_beta_sor = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)

        (beta_sim, phylo_beta_sim, beta_sne, phylo_beta_sne, beta_sor,
         phylo_beta_sor) = pbd.calculate_phylo_beta_diversity_sorensen(
             pam, tree)
        # Check matrix outputs to see if they are within tolerance
        assert np.allclose(beta_sim, test_beta_sim)
        assert np.allclose(phylo_beta_sim, test_phylo_beta_sim)
        assert np.allclose(beta_sne, test_beta_sne)
        assert np.allclose(phylo_beta_sne, test_phylo_beta_sne)
        assert np.allclose(beta_sor, test_beta_sor)
        assert np.allclose(phylo_beta_sor, test_phylo_beta_sor)

コード例 #11

0

ファイルを表示

def main():
    pam_fn = 'C:/Users/cj/Desktop/ryan_v3/pam.lmm'
    tree_fn = 'C:/Users/cj/Desktop/ryan_v3/squid_tree.nex'
    out_fn = 'C:/Users/cj/Desktop/ryan_v3/tree_mtx.lmm'

    with open(pam_fn, 'rb') as in_file:
        pam = Matrix.load_flo(in_file)
    tree = TreeWrapper.get(path=tree_fn, schema='nexus')
    tree_mtx = calculate_tree_site_statistics(pam, tree)
    with open(out_fn, 'wb') as out_file:
        tree_mtx.save(out_file)
    print(tree_mtx.max(axis=1))
    print(tree_mtx.max(axis=0))

コード例 #12

0

ファイルを表示

    def test_valid(self, tmpdir):
        """Test the function with valid inputs.

        Args:
            tmpdir (:obj:`py.path.local`): A temporary directory test fixture
                generated by pytest.
        """
        # Create a tree
        tree = TreeWrapper.get(data='(A,(B,((C,D),(E,F))));', schema='newick')
        mtx = Matrix(
            np.random.random((6, 3, 2)),
            headers={'0': ['A', 'B', 'C', 'D', 'E', 'F'],
                     '1': ['label', 'other_val', 'one_more_val']})
        # This should not fail
        output_directory = os.path.join(tmpdir.dirname, 'plots')
        create_distribution_plots(tree, mtx, output_directory)

コード例 #13

0

ファイルを表示

    def test_valid(self, valid_phylo_beta_diversity_package):
        """Test the method with valid data

        Note:
            * Test values were determined from example at
                https://rdrr.io/rforge/betapart/man/phylo.beta.pair.html
        """
        (pam_fn, tree_fn, test_beta_jac_fn, test_beta_jne_fn, test_beta_jtu_fn,
         _, _, _, test_phylo_beta_jac_fn, test_phylo_beta_jne_fn,
         test_phylo_beta_jtu_fn, _, _, _) = valid_phylo_beta_diversity_package

        with open(pam_fn) as in_f:
            pam = Matrix.load_csv(in_f, num_header_rows=1, num_header_cols=1)
        tree = TreeWrapper.from_filename(tree_fn)
        with open(test_beta_jac_fn) as in_f:
            test_beta_jac = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_jne_fn) as in_f:
            test_beta_jne = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_beta_jtu_fn) as in_f:
            test_beta_jtu = Matrix.load_csv(in_f,
                                            num_header_rows=1,
                                            num_header_cols=1)
        with open(test_phylo_beta_jac_fn) as in_f:
            test_phylo_beta_jac = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_jne_fn) as in_f:
            test_phylo_beta_jne = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)
        with open(test_phylo_beta_jtu_fn) as in_f:
            test_phylo_beta_jtu = Matrix.load_csv(in_f,
                                                  num_header_rows=1,
                                                  num_header_cols=1)

        (beta_jtu, phylo_beta_jtu, beta_jne, phylo_beta_jne, beta_jac,
         phylo_beta_jac) = pbd.calculate_phylo_beta_diversity_jaccard(
             pam, tree)
        # Check matrix outputs to see if they are within tolerance
        assert np.allclose(beta_jtu, test_beta_jtu)
        assert np.allclose(phylo_beta_jtu, test_phylo_beta_jtu)
        assert np.allclose(beta_jne, test_beta_jne)
        assert np.allclose(phylo_beta_jne, test_phylo_beta_jne)
        assert np.allclose(beta_jac, test_beta_jac)
        assert np.allclose(phylo_beta_jac, test_phylo_beta_jac)

コード例 #14

0

ファイルを表示

ファイル: generate_occurrence_report.py プロジェクト: biotaphy/projects

def get_report_data(accepted_taxa_filename, base_dir):
    num_accepted_species = 0
    species_report = {}
    # Generate report
    with open(accepted_taxa_filename) as taxa_file:
        for line in taxa_file:
            num_accepted_species += 1
            parts = line.split(', ')
            species_name = parts[1].strip().strip('"')
            sp_key = int(parts[2])
            genus_name = species_name.split(' ')[0]
            genus_dir = os.path.join(base_dir, genus_name)
            kew_filename = os.path.join(
                genus_dir, '{}_powo.json'.format(species_name))
            k_val = -1
            if os.path.exists(kew_filename):
                k_val += 1
                if os.stat(kew_filename).st_size > 5:
                    k_val += 1
            idigbio_filename = os.path.join(
                genus_dir, '{}_idigbio.csv'.format(species_name))
            i_val = -1
            if os.path.exists(idigbio_filename):
                i_val += 1
                if os.stat(idigbio_filename).st_size > 5:
                    i_val += 1
            gbif_filename = os.path.join(
                genus_dir, '{}_gbif.csv'.format(species_name))
            g_val = -1
            if os.path.exists(gbif_filename):
                g_val += 1
                if os.stat(gbif_filename).st_size > 5:
                    g_val += 1
            if sum([k_val, i_val, g_val]) < 3:
                species_report[species_name] = [k_val, i_val, g_val]
    
    # Create a matrix for output
    species_names = []
    report_data = []
    for k in sorted(species_report.keys()):
        species_names.append(k)
        report_data.append(species_report[k])
    species_report_matrix = Matrix(
        np.array(report_data), headers={
            '0': species_names, '1': ['POWO', 'iDigBio', 'GBIF']})
    return num_accepted_species, species_report_matrix

コード例 #15

0

ファイルを表示

def correct_p_values(p_values_matrix, false_discovery_rate=0.05):
    """Perform P-value correction.

    Args:
        p_values_matrix (:obj: `Matrix`): A Matrix of p-values to correct
        false_discovery_rate (:obj: `float`): An acceptable false discovery
            rate (alpha) value to declare a cell significant

    Returns:
        Matrix: A matrix object of significant values.

    Todo:
        * Enable other correction types
        * Consider how metadata may be added
        * Consider producing a matrix of the maximum FDR value that would mark
            each cell as significant
    """
    # Reshape data into one-dimensional array
    p_flat = p_values_matrix.flatten()

    num_vals = p_flat.size
    # 1. Order p-values
    # 2. Assign rank
    # 3. Create critical values
    # 4. Find the largest p-value such that P(i) < critical value
    # 5. All P(j) such that j <= i are significant
    rank = 1
    comp_p = 0.0
    for p in sorted(p_flat.tolist()):
        crit_val = false_discovery_rate * (float(rank) / num_vals)

        # Check if the p value is less than the critical value
        if p < crit_val:
            # If this p is smaller, all p values smaller than this one are
            #    "significant", even those that were greater than their
            #    respective critical value
            comp_p = p
        rank += 1

    headers = deepcopy(p_values_matrix.headers)
    headers[str(p_values_matrix.ndim)] = ['BH Corrected']
    sig_values = (p_values_matrix <= comp_p).astype(int)
    return Matrix(sig_values, headers=headers)

コード例 #16

0

ファイルを表示

ファイル: join_env_and_pam_stats.py プロジェクト: biotaphy/projects

def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('shapegrid_filename',
                        type=str,
                        help='File location of the shapegrid shapefile')
    parser.add_argument('pam_filename',
                        type=str,
                        help='File location of the PAM matrix for statistics')
    parser.add_argument('tree_filename',
                        type=str,
                        help='File location of the tree to use for statistics')
    parser.add_argument('tree_schema',
                        choices=['newick', 'nexus'],
                        help='The tree schema')
    parser.add_argument('out_geojson_filename',
                        type=str,
                        help='File location to write the output GeoJSON')
    parser.add_argument('--layer',
                        nargs=2,
                        action='append',
                        help='File location of a layer followed by a label')
    args = parser.parse_args()

    # Load data
    pam = Matrix.load(args.pam_filename)
    tree = TreeWrapper.get(path=args.tree_filename, schema=args.tree_schema)

    # Encode layers
    encoded_layers = encode_environment_layers(args.shapegrid_filename,
                                               args.layer)
    # Calculate PAM statistics
    stats_mtx = calculate_tree_site_statistics(pam, tree)
    # Join encoded layers and PAM statistics
    mtx = join_encoded_layers_and_pam_stats(encoded_layers, stats_mtx)
    # Generate GeoJSON
    geojson_data = create_geojson(args.shapegrid_filename, mtx)
    # Write GeoJSON
    with open(args.out_geojson_filename, 'w') as out_file:
        json.dump(geojson_data, out_file)

コード例 #17

0

ファイルを表示

ファイル: phylo_beta_diversity.py プロジェクト: hmarx/analyses

def calc_sig_phylo_sor(pam, tree):  # pragma: no cover
    """Calculates phylogenetic beta diversity for the sorensen index family.

    Args:
        pam (:obj:`Matrix`): A Lifemapper Matrix object with presence absence
            values.
        tree (:obj:`TreeWrapper`): A TreeWrapper object for a wrapped Dendropy
            phylogenetic tree.

    Returns:
        Phylogenetic beta diversity matrics (species by species)
            * beta_sim: ADD DESCRIPTION
            * phylo_beta_sim: ADD DESCRIPTION
            * beta_sne: ADD DESCRIPTION
            * phylo_beta_sne: ADD DESCRIPTION
            * beta_sor: ADD DESCRIPTION
            * phylo_beta_sor: ADD DESCRIPTION

    Todo:
        * Fill in method documentation
        * Fill in method
        * Fill in tests / documentation
    """
    # Get a lookup dictionary for the matrix index of each species in the PAM
    #    in case they are not in the same order as the taxa in the tree
    species_lookup = get_species_index_lookup(pam)

    # Build a header dictionary, all of the returned matricies will have the
    #    same headers, site rows by site columns.
    # Note: This will differ from the R method because each site will be
    #    present in both the rows and the columns.
    mtx_headers = {
        '0': pam.get_row_headers(),  # Row headers
        '1': pam.get_row_headers()  # Column headers
    }

    num_sites = pam.data.shape[0]  # Get the number of sites in the PAM

    # Note: For ease of development, use these numpy arrays for the
    #    computations.  They will be wrapped into a Matrix object when they are
    #    returned from the function.
    phylo_beta_sim_data = np.zeros((num_sites, num_sites), dtype=np.float)
    phylo_beta_sne_data = np.zeros((num_sites, num_sites), dtype=np.float)
    phylo_beta_sor_data = np.zeros((num_sites, num_sites), dtype=np.float)

    # TODO: Compute phylo beta diversity for sorensen index family
    core_calc = core_PD_calc(pam, tree)

    # This loop will populate arrays with beta diversity metrics.
    for my_row in range(core_calc.data.shape[0]):

        my_dat = core_calc.data[my_row, 0:4]
        my_dim = core_calc.get_row_headers()[my_row]

        phylo_beta_sim_data[my_dim[0],
                            my_dim[1]] = (my_dat[0] / (my_dat[0] + my_dat[3]))

        phylo_beta_sim_data[my_dim[1],
                            my_dim[0]] = phylo_beta_sim_data[my_dim[0],
                                                             my_dim[1]]

        phylo_beta_sor_data[my_dim[0],
                            my_dim[1]] = (my_dat[2] /
                                          ((2 * my_dat[3]) + my_dat[2]))

        phylo_beta_sor_data[my_dim[1],
                            my_dim[0]] = phylo_beta_sor_data[my_dim[0],
                                                             my_dim[1]]

        phylo_beta_sne_data[my_dim[0], my_dim[1]] = (
            (my_dat[1] - my_dat[0]) /
            ((2 * my_dat[3]) + my_dat[2])) * (my_dat[3] /
                                              (my_dat[0] + my_dat[3]))

        phylo_beta_sne_data[my_dim[1],
                            my_dim[0]] = phylo_beta_sne_data[my_dim[0],
                                                             my_dim[1]]

    # Just to match formatting across scripts.
    for i in range(num_sites):
        phylo_beta_sim_data[i, i] = 1.
        phylo_beta_sne_data[i, i] = 1.
        phylo_beta_sor_data[i, i] = 1.

    return (Matrix(phylo_beta_sim_data, headers=mtx_headers),
            Matrix(phylo_beta_sne_data, headers=mtx_headers),
            Matrix(phylo_beta_sor_data, headers=mtx_headers))

コード例 #18

0

ファイルを表示

ファイル: phylo_beta_diversity.py プロジェクト: hmarx/analyses

def calc_phylo_jac_distr(pam, tree, nrand=5):  # pragma: no cover
    """Calculates distribution of jaccard metrics based on randomization of
    phylogenetic relationships.

    Args:
        pam (:obj:`Matrix`): A Lifemapper Matrix object with presence absence
            values (site rows by species columns).
        tree (:obj:`TreeWrapper`): A TreeWrapper object for a wrapped Dendropy
            phylogenetic tree.

    Returns:
        Mean & SD of distribution of Jaccard-based metrics from randomizations.

    Note:
        * It looks like the scipy.spatial.distance.jaccard method may be useful
            here.

    Todo:
        * Fill in method documentation
        * Fill in method
        * Fill in tests and method documentation in sphinx
    """
    # Get a lookup dictionary for the matrix index of each species in the PAM
    #    in case they are not in the same order as the taxa in the tree
    species_lookup = get_species_index_lookup(pam)

    # Build a header dictionary, all of the returned matricies will have the
    #    same headers, site rows by site columns.
    # Note: This will differ from the R method because each site will be
    #    present in both the rows and the columns.
    mtx_headers = {
        '0': pam.get_row_headers(),  # Row headers
        '1': pam.get_row_headers()  # Column headers
    }

    num_sites = pam.data.shape[0]  # Get the number of sites in the PAM

    # print pam.data, "\n"
    # print pam.get_column_headers(),"\n"
    # print pam.get_row_headers(),"\n"

    # These matrices will serve as running average placeholders.
    pjtu_av = np.zeros((num_sites, num_sites), dtype=np.float)
    pjne_av = np.zeros((num_sites, num_sites), dtype=np.float)
    pjac_av = np.zeros((num_sites, num_sites), dtype=np.float)

    # TODO: Randomize tree, calc. metrics, save running average.
    # for trial in range(len(nrand)):
    # Randomize tip labels of tree.

    # Get core metrics related to phylogeny.
    core_calc = core_PD_calc(pam, tree)  # Matrix object.

    # This loop will populate arrays with all beta diversity metrics.
    for my_row in range(core_calc.data.shape[0]):
        # Pull out the phylogentic core numeric values.
        my_dat = core_calc.data[my_row, 0:4]
        # Get index values for placing into output arrays.
        my_dim = core_calc.get_row_headers()[my_row]

        # Populate arrays.
        phylo_beta_jtu_data[my_dim[0], my_dim[1]] = (2 * my_dat[0]) / (
            (2 * my_dat[0]) + my_dat[3])

        phylo_beta_jtu_data[my_dim[1],
                            my_dim[0]] = phylo_beta_jtu_data[my_dim[0],
                                                             my_dim[1]]

        phylo_beta_jac_data[my_dim[0],
                            my_dim[1]] = (my_dat[2] / (my_dat[3] + my_dat[2]))

        phylo_beta_jac_data[my_dim[1],
                            my_dim[0]] = phylo_beta_jac_data[my_dim[0],
                                                             my_dim[1]]

        phylo_beta_jne_data[my_dim[0], my_dim[1]] = (
            (my_dat[1] - my_dat[0]) /
            (my_dat[3] + my_dat[2])) * (my_dat[3] /
                                        ((2 * my_dat[0]) + my_dat[3]))

        phylo_beta_jne_data[my_dim[1],
                            my_dim[0]] = phylo_beta_jne_data[my_dim[0],
                                                             my_dim[1]]

    # Ensure diagonals are 1 just to match Biotaphy test file expectations.
    for i in range(num_sites):
        phylo_beta_jtu_data[i, i] = 1.
        phylo_beta_jac_data[i, i] = 1.
        phylo_beta_jne_data[i, i] = 1.

    return (Matrix(phylo_beta_jtu_data, headers=mtx_headers),
            Matrix(phylo_beta_jne_data, headers=mtx_headers),
            Matrix(phylo_beta_jac_data, headers=mtx_headers))

コード例 #19

0

ファイルを表示

ファイル: phylo_beta_diversity.py プロジェクト: hmarx/analyses

def calculate_phylo_beta_diversity_jaccard(pam, tree):
    """Calculates phylogenetic beta diversity for the jaccard index family.

    Args:
        pam (:obj:`Matrix`): A Lifemapper Matrix object with presence absence
            values (site rows by species columns).
        tree (:obj:`TreeWrapper`): A TreeWrapper object for a wrapped Dendropy
            phylogenetic tree.

    Returns:
        Phylogenetic beta diversity matrics (species by species)
            * beta_jtu: ADD DESCRIPTION
            * phylo_beta_jtu: ADD DESCRIPTION
            * beta_jne: ADD DESCRIPTION
            * phylo_beta_jne: ADD DESCRIPTION
            * beta_jac: ADD DESCRIPTION
            * phylo_beta_jac: ADD DESCRIPTION

    Note:
        * It looks like the scipy.spatial.distance.jaccard method may be useful
            here.

    Todo:
        * Fill in method documentation
        * Fill in method
    """
    # Get a lookup dictionary for the matrix index of each species in the PAM
    #    in case they are not in the same order as the taxa in the tree
    species_lookup = get_species_index_lookup(pam)

    # Build a header dictionary, all of the returned matricies will have the
    #    same headers, site rows by site columns.
    # Note: This will differ from the R method because each site will be
    #    present in both the rows and the columns.
    mtx_headers = {
        '0': pam.get_row_headers(),  # Row headers
        '1': pam.get_row_headers()  # Column headers
    }

    num_sites = pam.shape[0]  # Get the number of sites in the PAM

    # print pam.data, "\n"
    # print pam.get_column_headers(),"\n"
    # print pam.get_row_headers(),"\n"

    # Note: For ease of development, use these numpy arrays for the
    #    computations.  They will be wrapped into a Matrix object when they are
    #    returned from the function.
    beta_jtu_data = np.zeros((num_sites, num_sites), dtype=np.float)
    phylo_beta_jtu_data = np.zeros((num_sites, num_sites), dtype=np.float)
    beta_jne_data = np.zeros((num_sites, num_sites), dtype=np.float)
    phylo_beta_jne_data = np.zeros((num_sites, num_sites), dtype=np.float)
    beta_jac_data = np.zeros((num_sites, num_sites), dtype=np.float)
    phylo_beta_jac_data = np.zeros((num_sites, num_sites), dtype=np.float)

    # TODO: Compute phylo beta diversity for jaccard index family

    # Get core metrics related to phylogeny.
    core_calc = core_PD_calc(pam, tree)  # Matrix object.

    # This loop will populate arrays with all beta diversity metrics.
    for my_row in range(core_calc.shape[0]):
        # Pull out the phylogentic core numeric values.
        my_dat = core_calc[my_row, 0:4]
        # Get index values for placing into output arrays.
        my_dim = core_calc.get_row_headers()[my_row]

        # Populate arrays.
        phylo_beta_jtu_data[my_dim[0], my_dim[1]] = (2 * my_dat[0]) / (
            (2 * my_dat[0]) + my_dat[3])

        phylo_beta_jtu_data[my_dim[1],
                            my_dim[0]] = phylo_beta_jtu_data[my_dim[0],
                                                             my_dim[1]]

        phylo_beta_jac_data[my_dim[0],
                            my_dim[1]] = (my_dat[2] / (my_dat[3] + my_dat[2]))

        phylo_beta_jac_data[my_dim[1],
                            my_dim[0]] = phylo_beta_jac_data[my_dim[0],
                                                             my_dim[1]]

        phylo_beta_jne_data[my_dim[0], my_dim[1]] = (
            (my_dat[1] - my_dat[0]) /
            (my_dat[3] + my_dat[2])) * (my_dat[3] /
                                        ((2 * my_dat[0]) + my_dat[3]))

        phylo_beta_jne_data[my_dim[1],
                            my_dim[0]] = phylo_beta_jne_data[my_dim[0],
                                                             my_dim[1]]

    # Get core metrics for simple beta diversity (no phylo component).
    '''
        Arrays: 0==shared; 1==not shared; 2==sum not shared;
        3==max not shared; 4==min not shared.
    '''
    core_beta = core_Beta_calc(pam, tree)

    # Populate arrays.
    beta_jtu_data = (2 * core_beta[4]) / ((2 * core_beta[4]) + core_beta[0])

    beta_jne_data = ((core_beta[3] - core_beta[4]) /
                     (core_beta[0] + core_beta[2])) * (core_beta[0] / (
                         (2 * core_beta[4]) + core_beta[0]))

    beta_jac_data = core_beta[2] / (core_beta[0] + core_beta[2])

    # Ensure diagonals are 1 just to match Biotaphy test file expectations.
    for i in range(num_sites):
        phylo_beta_jtu_data[i, i] = 1.
        phylo_beta_jac_data[i, i] = 1.
        phylo_beta_jne_data[i, i] = 1.
        beta_jtu_data[i, i] = 1.
        beta_jac_data[i, i] = 1.
        beta_jne_data[i, i] = 1.

    return (Matrix(beta_jtu_data, headers=mtx_headers),
            Matrix(phylo_beta_jtu_data, headers=mtx_headers),
            Matrix(beta_jne_data, headers=mtx_headers),
            Matrix(phylo_beta_jne_data, headers=mtx_headers),
            Matrix(beta_jac_data, headers=mtx_headers),
            Matrix(phylo_beta_jac_data, headers=mtx_headers))

コード例 #20

0

ファイルを表示

ファイル: phylo_beta_diversity.py プロジェクト: hmarx/analyses

def core_PD_calc(pam, tree):
    """Creates array of core metrics to asses components of beta diversity.

    Args:
        pam (:obj:'Matrix'): A Lifemapper Matrix object with presence absence
            values.

        tree (:obj:'TreeWrapper'): A TreeWrapper object for a wrapped Dendropy
            phylogenetic tree.

    Returns:
        Matrix object. Cols=core metrics; Rows=Pairwise comparisons.

    Details:
        In general, the metrics returned represent different contributions to
        PD arising from how communities are combined together.
        Metrics:
            min_not_shared: smallest dist. from ind. samples to their combo.
            max_not_shared: largest dist. from ind. samples to their combo.
            sum_not_shared: total addition to PD from both comm.
            shared: combined contribution to PD that comm. make jointly.
    """

    # PD for each community of the community matrix.
    pd = pdnew(pam, tree)

    # List all possible pairwise community combinations.
    combin = list(it.combinations(range(len(pam.get_row_headers())), 2))

    # Array to store PD of pairwise site combinations.
    # Rows = all pairwise community comparisons. Cols = spp.
    com_tot_pair = np.zeros((len(combin), len(pam.get_column_headers())),
                            dtype=np.float)

    # This loop will populate the pairwise PD_array.
    # 1 == spp present in at least 1 sample; 0 == spp absent from both samples.
    for pair in range(len(combin)):
        # Assign each site's data to new variable for convenience.
        site0 = pam[combin[pair][0]]
        site1 = pam[combin[pair][1]]

        # Is each spp present in at least 1 sample.
        for idx in range(len(site0)):
            if site0[idx] or site1[idx]:
                com_tot_pair[pair, idx] = 1
            else:
                com_tot_pair[pair, idx] = 0

    # Convert pairwise PD_array into Matrix object for pdnew() function.
    com_tot_pair = Matrix(com_tot_pair,
                          headers={
                              '0': combin,
                              '1': pam.get_column_headers()
                          })

    # Array holding the PD of each pairwise community combination.
    pd_tot_pair = pdnew(com_tot_pair, tree)

    # The following will calculate the sum of each pair of sample's PD.
    # I.e. treats each sample separately.
    sum_pd_pair = []
    for pair in range(len(combin)):
        tmp = pd[combin[pair][0], 0] + pd[combin[pair][1], 0]
        sum_pd_pair.append(tmp)

    # PD of all communities combined.
    com_tot_multi = np.sum(pam, axis=0)
    # Convert to presence/ absence (i.e. 1,0).
    com_tot_multi = [1 if i > 0 else 0 for i in com_tot_multi]
    # Calculate the PD.
    sp_pres = list(it.compress(pam.get_column_headers(), com_tot_multi))
    # Dendropy labels don't retain '_'
    sp_pres = [i.replace('_', ' ') for i in sp_pres]
    tree_pres = tree.extract_tree_with_taxa_labels(sp_pres)
    pd_tot_multi = tree_pres.length()

    # Contribution of PD that is not shared beteen two sites:
    # pull out just PD values.
    pd_sites = pd[0:len(pd.get_row_headers()), 0]
    # create list of all pairwise combinations.
    pd_combos = list(it.combinations(pd_sites, 2))

    # Array to hold metrics assessing PD contributions to beta diversity.
    not_shared = np.zeros((len(pd_combos), 4), dtype=np.float)

    # This loop will populate array. see Details.
    for pair in range(len(pd_combos)):
        # Pull out each site's individual data.
        site1 = pd_combos[pair][0]  # PD site 1
        site2 = pd_combos[pair][1]  # PD site 2

        # Pull out the PD of the 2 sites combined.
        pdpair = pd_tot_pair[pair][0]
        # Pull out the sum of the separate PD values.
        sum_pair = sum_pd_pair[pair]

        # Metrics of interest:
        min_not_shared = min(pdpair - site1, pdpair - site2)  # min(b,c)
        max_not_shared = max(pdpair - site1, pdpair - site2)  # max(b,c)
        sum_not_shared = (2 * pdpair) - sum_pair  # b+c
        shared_val = pdpair - sum_not_shared  # a

        # Add metrics to appropriate row of array.
        not_shared[pair] = [
            min_not_shared, max_not_shared, sum_not_shared, shared_val
        ]

    # Convert not_shared array to Matrix object.
    core_calc = Matrix(
        not_shared,
        headers={
            '0': combin,
            '1':
            ['min_not_shared', 'max_not_shared', 'sum_not_shared', 'shared']
        })

    # return values.
    return core_calc

コード例 #21

0

ファイルを表示

def calculate_continuous_ancestral_states(tree,
                                          char_mtx,
                                          sum_to_one=False,
                                          calc_std_err=False):
    """Calculates the continuous ancestral states for the nodes in a tree.

    Args:
        tree (Tree): A dendropy tree or TreeWrapper object.
        char_mtx (Matrix): A Matrix object with character information.  Each
            row should represent a tip in the tree and each column should be a
            variable to calculate ancestral state for.
        calc_std_err (:obj:`bool`, optional): If True, calculate standard error
            for each variable.  Defaults to False.
        sum_to_one (:obj:`bool`, optional): If True, standardize the character
            matrix so that the values in a row sum to one. Defaults to False.

    Returns:
        A matrix of character data with the following dimensions:
            * rows: nodes / tips in the tree
            * columns: character variables
            * depth: first is the calculated value, second layer is standard
                error if desired

    Todo:
        * Add function for consistent label handling.
    """
    # Wrap tree if dendropy tree
    if not isinstance(tree, TreeWrapper):
        tree = TreeWrapper.from_base_tree(tree)

    # Assign labels to nodes that don't have them
    tree.add_node_labels()

    # Synchronize tree and character data
    # Prune tree
    prune_taxa = []
    keep_taxon_labels = []
    init_row_headers = char_mtx.get_row_headers()
    for taxon in tree.taxon_namespace:
        label = taxon.label.replace(' ', '_')
        if label not in init_row_headers:
            prune_taxa.append(taxon)
            print(
                'Could not find {} in character matrix, pruning'.format(label))
        else:
            keep_taxon_labels.append(label)

    if len(keep_taxon_labels) == 0:
        raise Exception(
            'None of the tree tips were found in the character data')

    tree.prune_taxa(prune_taxa)
    tree.purge_taxon_namespace()

    # Prune character data
    keep_rows = []
    i = 0
    for label in init_row_headers:
        if label in keep_taxon_labels:
            keep_rows.append(i)
        else:
            print('Could not find {} in tree tips, pruning'.format(label))
        i += 1
    char_mtx = char_mtx.slice(keep_rows)

    # Standardize character matrix if requested
    tip_count, num_vars = char_mtx.shape
    if sum_to_one:
        for i in range(tip_count):
            sc = float(1.0) / np.sum(char_mtx[i])
            for j in range(num_vars):
                char_mtx[i, j] *= sc

    # Initialize data matrix
    num_nodes = len(tree.nodes())
    data_shape = (num_nodes, num_vars, 2 if calc_std_err else 1)
    data = np.zeros(data_shape, dtype=float)

    # Initialize headers
    row_headers = []

    tip_col_headers = char_mtx.get_column_headers()
    tip_row_headers = char_mtx.get_row_headers()
    tip_lookup = dict([(tip_row_headers[i].replace('_', ' '), i)
                       for i in range(tip_count)])

    # Get the number of internal nodes in the tree
    internal_node_count = num_nodes - tip_count
    # Loop through the tree and set the matrix index for each node
    # Also set data values
    node_headers = []
    node_i = tip_count
    tip_i = 0
    node_index_lookup = {}
    for node in tree.nodes():
        label = _get_node_label(node)
        if len(node.child_nodes()) == 0:
            # Tip
            node_index_lookup[label] = tip_i
            row_headers.append(label)
            data[tip_i, :, 0] = char_mtx[tip_lookup[label]]
            tip_i += 1
        else:
            node_index_lookup[label] = node_i
            node_headers.append(label)
            # Internal node
            data[node_i, :, 0] = np.zeros((1, num_vars), dtype=float)
            node_i += 1

    # Row headers should be extended with node headers
    row_headers.extend(node_headers)

    # For each variable
    for x in range(num_vars):
        # Compute the ML estimate of the root
        full_mcp = np.zeros((internal_node_count, internal_node_count),
                            dtype=float)
        full_vcp = np.zeros(internal_node_count, dtype=float)

        for k in tree.postorder_edge_iter():
            i = k.head_node
            if len(i.child_nodes()) != 0:
                node_num_i = node_index_lookup[_get_node_label(i)] - tip_count
                for j in i.child_nodes():
                    tbl = 2. / j.edge_length
                    full_mcp[node_num_i][node_num_i] += tbl
                    node_num_j = node_index_lookup[_get_node_label(j)]

                    if len(j.child_nodes()) == 0:
                        full_vcp[node_num_i] += (data[node_num_j, x, 0] * tbl)
                    else:
                        node_num_j -= tip_count
                        full_mcp[node_num_i][node_num_j] -= tbl
                        full_mcp[node_num_j][node_num_i] -= tbl
                        full_mcp[node_num_j][node_num_j] += tbl

        b = la.cho_factor(full_mcp)

        # these are the ML estimates for the ancestral states
        ml_est = la.cho_solve(b, full_vcp)
        sos = 0
        for k in tree.postorder_edge_iter():
            i = k.head_node
            node_num_i = node_index_lookup[_get_node_label(i)]
            if len(i.child_nodes()) != 0:
                data[node_num_i, x, 0] = ml_est[node_num_i - tip_count]

                if calc_std_err:
                    for j in i.child_nodes():
                        node_num_j = node_index_lookup[_get_node_label(j)]
                        temp = data[node_num_i, x, 0] - data[node_num_j, x, 0]
                        sos += temp * temp / j.edge_length

                    # nni is node_num_i adjusted for only nodes
                    nni = node_num_i - tip_count
                    qpq = full_mcp[nni][nni]
                    tm1 = np.delete(full_mcp, (nni), axis=0)
                    tm = np.delete(tm1, (nni), axis=1)
                    b = la.cho_factor(tm)
                    sol = la.cho_solve(b, tm1[:, nni])
                    temp_std_err = qpq - np.inner(tm1[:, nni], sol)
                    data[node_num_i, x, 1] = math.sqrt(
                        2.0 * sos / ((internal_node_count - 1) * temp_std_err))

    depth_headers = ['maximum_likelihood']
    if calc_std_err:
        depth_headers.append('standard_error')

    mtx_headers = {'0': row_headers, '1': tip_col_headers, '2': depth_headers}
    return tree, Matrix(data, headers=mtx_headers)

コード例 #22

0

ファイルを表示

ファイル: phylo_beta_diversity.py プロジェクト: biotaphy/BiotaPhyPy

def calculate_phylo_beta_diversity_sorensen(pam, tree):
    """Calculates phylogenetic beta diversity for the sorensen index family.

    Args:
        pam (:obj:`Matrix`): A Lifemapper Matrix object with presence absence
            values.
        tree (:obj:`TreeWrapper`): A TreeWrapper object for a wrapped Dendropy
            phylogenetic tree.

    Returns:
        Phylogenetic beta diversity matrics (species by species)
            * beta_sim: ADD DESCRIPTION
            * phylo_beta_sim: ADD DESCRIPTION
            * beta_sne: ADD DESCRIPTION
            * phylo_beta_sne: ADD DESCRIPTION
            * beta_sor: ADD DESCRIPTION
            * phylo_beta_sor: ADD DESCRIPTION

    Todo:
        * Fill in method documentation
        * Fill in method
    """
    # Build a header dictionary, all of the returned matricies will have the
    #    same headers, site rows by site columns.
    # Note: This will differ from the R method because each site will be
    #    present in both the rows and the columns.
    mtx_headers = {
        '0': pam.get_row_headers(),  # Row headers
        '1': pam.get_row_headers()  # Column headers
    }

    num_sites = pam.shape[0]  # Get the number of sites in the PAM

    # Note: For ease of development, use these numpy arrays for the
    #    computations.  They will be wrapped into a Matrix object when they are
    #    returned from the function.
    beta_sim_data = np.zeros((num_sites, num_sites), dtype=float)
    phylo_beta_sim_data = np.zeros((num_sites, num_sites), dtype=float)
    beta_sne_data = np.zeros((num_sites, num_sites), dtype=float)
    phylo_beta_sne_data = np.zeros((num_sites, num_sites), dtype=float)
    beta_sor_data = np.zeros((num_sites, num_sites), dtype=float)
    phylo_beta_sor_data = np.zeros((num_sites, num_sites), dtype=float)

    # TODO: Compute phylo beta diversity for sorensen index family
    core_calc = core_PD_calc(pam, tree)

    # This loop will populate arrays with beta diversity metrics.
    for my_row in range(core_calc.shape[0]):

        my_dat = core_calc[my_row, 0:4]
        my_dim = core_calc.get_row_headers()[my_row]

        phylo_beta_sim_data[my_dim[0], my_dim[1]] = (
            my_dat[0] / (my_dat[0] + my_dat[3]))

        phylo_beta_sim_data[my_dim[1], my_dim[0]] = phylo_beta_sim_data[
            my_dim[0], my_dim[1]]

        phylo_beta_sor_data[my_dim[0], my_dim[1]] = (
            my_dat[2] / ((2*my_dat[3]) + my_dat[2]))

        phylo_beta_sor_data[my_dim[1], my_dim[0]] = phylo_beta_sor_data[
            my_dim[0], my_dim[1]]

        phylo_beta_sne_data[my_dim[0], my_dim[1]] = (
            (my_dat[1] - my_dat[0]) / ((2*my_dat[3]) + my_dat[2])) * (
                my_dat[3] / (my_dat[0] + my_dat[3]))

        phylo_beta_sne_data[my_dim[1], my_dim[0]] = phylo_beta_sne_data[
            my_dim[0], my_dim[1]]

    # Get core metrics for simple beta diversity (no phylo component).
    '''
        Arrays: 0==shared; 1==not shared; 2==sum not shared;
        3==max not shared; 4==min not shared.
    '''
    core_beta = core_Beta_calc(pam, tree)

    # Populate arrays.
    beta_sim_data = core_beta[4] / (core_beta[4] + core_beta[0])

    beta_sor_data = core_beta[2] / ((2 * core_beta[0]) + core_beta[2])

    beta_sne_data = (
        (core_beta[3] - core_beta[4]) / ((2 * core_beta[0]) + core_beta[2])
        ) * (core_beta[0] / (core_beta[4] + core_beta[0]))

    # Just to match formatting across scripts.
    for i in range(num_sites):
        phylo_beta_sim_data[i, i] = 1.
        phylo_beta_sne_data[i, i] = 1.
        phylo_beta_sor_data[i, i] = 1.
        beta_sim_data[i, i] = 1.
        beta_sne_data[i, i] = 1.
        beta_sor_data[i, i] = 1.

    return (
        Matrix(beta_sim_data, headers=mtx_headers),
        Matrix(phylo_beta_sim_data, headers=mtx_headers),
        Matrix(beta_sne_data, headers=mtx_headers),
        Matrix(phylo_beta_sne_data, headers=mtx_headers),
        Matrix(beta_sor_data, headers=mtx_headers),
        Matrix(phylo_beta_sor_data, headers=mtx_headers))

コード例 #23

0

ファイルを表示

ファイル: join_env_and_pam_stats.py プロジェクト: biotaphy/projects

def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('--out_stats_matrix_filename',
                        type=str,
                        help='Location to write statistics matrix.')
    parser.add_argument('shapegrid_filename',
                        type=str,
                        help='File location of the shapegrid shapefile')
    parser.add_argument('pam_filename',
                        type=str,
                        help='File location of the PAM matrix for statistics')
    parser.add_argument('tree_filename',
                        type=str,
                        help='File location of the tree to use for statistics')
    parser.add_argument('tree_schema',
                        choices=['newick', 'nexus'],
                        help='The tree schema')
    parser.add_argument('out_geojson_filename',
                        type=str,
                        help='File location to write the output GeoJSON')
    parser.add_argument('out_csv_filename',
                        type=str,
                        help='File location to write the output CSV')
    parser.add_argument('out_matrix_filename',
                        type=str,
                        help='File location to write the output matrix')
    parser.add_argument('--layer',
                        nargs=2,
                        action='append',
                        help='File location of a layer followed by a label')
    args = parser.parse_args()

    # Load data
    pam = Matrix.load(args.pam_filename)
    tree = TreeWrapper.get(path=args.tree_filename, schema=args.tree_schema)

    # Encode layers
    encoded_layers = encode_environment_layers(args.shapegrid_filename,
                                               args.layer)
    # Calculate PAM statistics
    stats_mtx = calculate_tree_site_statistics(pam, tree)
    if args.out_stats_matrix_filename:
        stats_mtx.write(args.out_stats_matrix_filename)
    # Join encoded layers and PAM statistics
    mtx = join_encoded_layers_and_pam_stats(encoded_layers, stats_mtx)
    # Generate GeoJSON
    geojson_data = create_geojson(args.shapegrid_filename, mtx)
    # Write GeoJSON
    with open(args.out_geojson_filename, 'w') as out_file:
        json.dump(geojson_data, out_file, indent=4)

    # Write matrix data
    new_rh = []
    res = 0.5
    for _, x, y in mtx.get_row_headers():
        min_x = x - res
        max_x = x + res
        min_y = y - res
        max_y = y + res
        new_rh.append('"POLYGON (({} {},{} {},{} {},{} {},{} {}))"'.format(
            min_x, max_y, max_x, max_y, max_x, min_y, min_x, min_y, min_x,
            max_y))
    mtx.write(args.out_matrix_filename)
    mtx.set_row_headers(new_rh)
    with open(args.out_csv_filename, 'w') as out_file:
        mtx.write_csv(out_file)