def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('in_tree_filename',
                        type=str,
                        help='Path to initial tree')
    parser.add_argument('in_tree_schema',
                        type=str,
                        choices=['nexus', 'newick'],
                        help='The input tree schema')
    parser.add_argument('out_tree_filename', type=str, help='Ouput tree path')
    parser.add_argument('out_tree_schema',
                        type=str,
                        choices=['nexus', 'newick'],
                        help='The output tree schema')
    parser.add_argument('accepted_taxa_filename',
                        type=str,
                        help='File path to write out accepted taxon names')
    args = parser.parse_args()
    tree = TreeWrapper.get(path=args.in_tree_filename,
                           schema=args.in_tree_schema)
    out_tree, accepted_taxa = get_and_replace_names(tree)
    # Write tree
    out_tree.write(path=args.out_tree_filename, schema=args.out_tree_schema)
    # Write accepted taxa
    with open(args.accepted_taxa_filename, 'w') as taxa_out_file:
        for taxon_name in accepted_taxa:
            taxa_out_file.write(taxon_name)
예제 #2
0
 def test_valid(self):
     """Test the function with valid inputs."""
     # Create a tree
     tree = TreeWrapper.get(data='(A,(B,((C,D),(E,F))));', schema='newick')
     mtx = Matrix(np.random.random((6, 2, 1)),
                  headers={
                      '0': ['A', 'B', 'C', 'D', 'E', 'F'],
                      '1': ['label', 'other_val']
                  })
     # This should not fail
     annotators.annotate_tree_with_label(tree, mtx, label_column=0)
예제 #3
0
def main():
    pam_fn = 'C:/Users/cj/Desktop/ryan_v3/pam.lmm'
    tree_fn = 'C:/Users/cj/Desktop/ryan_v3/squid_tree.nex'
    out_fn = 'C:/Users/cj/Desktop/ryan_v3/tree_mtx.lmm'

    with open(pam_fn, 'rb') as in_file:
        pam = Matrix.load_flo(in_file)
    tree = TreeWrapper.get(path=tree_fn, schema='nexus')
    tree_mtx = calculate_tree_site_statistics(pam, tree)
    with open(out_fn, 'wb') as out_file:
        tree_mtx.save(out_file)
    print(tree_mtx.max(axis=1))
    print(tree_mtx.max(axis=0))
예제 #4
0
def purge_tree(tree_filename, tree_schema, occurrence_filename, species_col):
    """Get a tree and purge taxa not in occurrence data."""
    tree = TreeWrapper.get(path=tree_filename, schema=tree_schema)
    species = set([])
    with open(occurrence_filename, 'r') as in_file:
        for line in in_file:
            parts = line.split(', ')
            sp_name = parts[species_col].strip()
            species.add(sp_name)
    purge_taxa = []
    for taxon in tree.taxon_namespace:
        if not taxon.label in species:
            purge_taxa.append(taxon)
    tree.prune_taxa(purge_taxa)
    tree.purge_taxon_namespace()
    return tree
예제 #5
0
    def test_valid(self, tmpdir):
        """Test the function with valid inputs.

        Args:
            tmpdir (:obj:`py.path.local`): A temporary directory test fixture
                generated by pytest.
        """
        # Create a tree
        tree = TreeWrapper.get(data='(A,(B,((C,D),(E,F))));', schema='newick')
        mtx = Matrix(
            np.random.random((6, 3, 2)),
            headers={'0': ['A', 'B', 'C', 'D', 'E', 'F'],
                     '1': ['label', 'other_val', 'one_more_val']})
        # This should not fail
        output_directory = os.path.join(tmpdir.dirname, 'plots')
        create_distribution_plots(tree, mtx, output_directory)
def get_squidded_tree(tree_fn, tree_schema, squid_json):
    tree = TreeWrapper.get(path=tree_fn, schema=tree_schema)
    json_data_lines = []
    with open(squid_json) as in_file:
        first_line = True
        for line in in_file:
            if first_line:
                first_line = False
            else:
                json_data_lines.append(line)
    squid_list_json = json.loads(''.join(json_data_lines))
    squid_dict = {
        i['scientific_name'].replace('_', ' '): i['header']
        for i in squid_list_json
    }
    tree.annotate_tree_tips('squid', squid_dict)
    return tree
예제 #7
0
def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('shapegrid_filename',
                        type=str,
                        help='File location of the shapegrid shapefile')
    parser.add_argument('pam_filename',
                        type=str,
                        help='File location of the PAM matrix for statistics')
    parser.add_argument('tree_filename',
                        type=str,
                        help='File location of the tree to use for statistics')
    parser.add_argument('tree_schema',
                        choices=['newick', 'nexus'],
                        help='The tree schema')
    parser.add_argument('out_geojson_filename',
                        type=str,
                        help='File location to write the output GeoJSON')
    parser.add_argument('--layer',
                        nargs=2,
                        action='append',
                        help='File location of a layer followed by a label')
    args = parser.parse_args()

    # Load data
    pam = Matrix.load(args.pam_filename)
    tree = TreeWrapper.get(path=args.tree_filename, schema=args.tree_schema)

    # Encode layers
    encoded_layers = encode_environment_layers(args.shapegrid_filename,
                                               args.layer)
    # Calculate PAM statistics
    stats_mtx = calculate_tree_site_statistics(pam, tree)
    # Join encoded layers and PAM statistics
    mtx = join_encoded_layers_and_pam_stats(encoded_layers, stats_mtx)
    # Generate GeoJSON
    geojson_data = create_geojson(args.shapegrid_filename, mtx)
    # Write GeoJSON
    with open(args.out_geojson_filename, 'w') as out_file:
        json.dump(geojson_data, out_file)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('in_tree_filename',
                        type=str,
                        help='The file location of the input tree')
    parser.add_argument('in_tree_schema',
                        type=str,
                        choices=['newick', 'nexus'],
                        help='The schema of the input tree')
    parser.add_argument('out_tree_filename',
                        type=str,
                        help='The file location of the output tree')
    parser.add_argument('out_tree_schema',
                        type=str,
                        choices=['newick', 'nexus'],
                        help='The schema of the output tree')

    args = parser.parse_args()
    tree = TreeWrapper.get(path=args.in_tree_filename,
                           schema=args.in_tree_schema)
    out_tree = get_and_replace_names(tree)
    out_tree.write(path=args.out_tree_filename, schema=args.out_tree_schema)
예제 #9
0
        '-c', '--out_csv_filename', type=str,
        help='If provided, write the output character matrix CSV '
             'to this file location')

    args = parser.parse_args()

    # Check that input files exist
    if not os.path.exists(args.in_tree_filename):
        raise IOError(
            'Input tree {} does not exist'.format(args.in_tree_filename))
    if not os.path.exists(args.data_filename):
        raise IOError(
            'Input data file {} does not exist'.format(args.data_filename))

    # Read the tree
    tree = TreeWrapper.get(
        path=args.in_tree_filename, schema=args.in_tree_schema)

    # Read data
    if args.data_format == 'csv':
        with open(args.data_filename) as in_file:
            sequences, headers = data_readers.read_csv_alignment_flo(
                in_file)
    elif args.data_format == 'json':
        with open(args.data_filename) as in_file:
            sequences, headers = data_readers.read_json_alginment_flo(
                inf_file)
    elif args.data_format == 'phylip':
        with open(args.data_filename) as in_file:
            sequences = data_reders.read_phylip_alignment_flo(in_file)
        headers = None
    elif args.data_format == 'table':
예제 #10
0
def main():
    """Main method for script."""
    parser = argparse.ArgumentParser()
    parser.add_argument('--out_stats_matrix_filename',
                        type=str,
                        help='Location to write statistics matrix.')
    parser.add_argument('shapegrid_filename',
                        type=str,
                        help='File location of the shapegrid shapefile')
    parser.add_argument('pam_filename',
                        type=str,
                        help='File location of the PAM matrix for statistics')
    parser.add_argument('tree_filename',
                        type=str,
                        help='File location of the tree to use for statistics')
    parser.add_argument('tree_schema',
                        choices=['newick', 'nexus'],
                        help='The tree schema')
    parser.add_argument('out_geojson_filename',
                        type=str,
                        help='File location to write the output GeoJSON')
    parser.add_argument('out_csv_filename',
                        type=str,
                        help='File location to write the output CSV')
    parser.add_argument('out_matrix_filename',
                        type=str,
                        help='File location to write the output matrix')
    parser.add_argument('--layer',
                        nargs=2,
                        action='append',
                        help='File location of a layer followed by a label')
    args = parser.parse_args()

    # Load data
    pam = Matrix.load(args.pam_filename)
    tree = TreeWrapper.get(path=args.tree_filename, schema=args.tree_schema)

    # Encode layers
    encoded_layers = encode_environment_layers(args.shapegrid_filename,
                                               args.layer)
    # Calculate PAM statistics
    stats_mtx = calculate_tree_site_statistics(pam, tree)
    if args.out_stats_matrix_filename:
        stats_mtx.write(args.out_stats_matrix_filename)
    # Join encoded layers and PAM statistics
    mtx = join_encoded_layers_and_pam_stats(encoded_layers, stats_mtx)
    # Generate GeoJSON
    geojson_data = create_geojson(args.shapegrid_filename, mtx)
    # Write GeoJSON
    with open(args.out_geojson_filename, 'w') as out_file:
        json.dump(geojson_data, out_file, indent=4)

    # Write matrix data
    new_rh = []
    res = 0.5
    for _, x, y in mtx.get_row_headers():
        min_x = x - res
        max_x = x + res
        min_y = y - res
        max_y = y + res
        new_rh.append('"POLYGON (({} {},{} {},{} {},{} {},{} {}))"'.format(
            min_x, max_y, max_x, max_y, max_x, min_y, min_x, min_y, min_x,
            max_y))
    mtx.write(args.out_matrix_filename)
    mtx.set_row_headers(new_rh)
    with open(args.out_csv_filename, 'w') as out_file:
        mtx.write_csv(out_file)
예제 #11
0
    def test_package_valid(self, valid_ancestral_state_package):
        """Tests the calculate_continusous_ancestral_states method.

        Args:
            valid_ancestral_state_package (pytest.fixture): A parameterized
                pytest fixture defined in conftest.py that provides a valid
                test package.

        Note:
            * This test will need to evolve as the output format changes.  It
                will probably be better to return a data structure with various
                values for each node rather than assigning the value to the
                node label.

        Raises:
            IOError: When the tree or alignment cannot be loaded for the
                specified file extension.
            Exception: When a specified successful result value cannot be
                found.
        """
        # Get the data files
        (tree_filename, alignment_filename,
         results_filename) = valid_ancestral_state_package

        # Process the tree file
        _, tree_ext = os.path.splitext(tree_filename)
        if tree_ext == '.nex':
            tree_schema = 'nexus'
        elif tree_ext == '.xml':
            tree_schema = 'nexml'
        elif tree_ext == '.tre':
            tree_schema = 'newick'
        else:
            raise IOError(
                'Cannot handle tree with extension: {}'.format(tree_ext))
        # tree = dendropy.Tree.get(path=tree_filename, schema=tree_schema)
        tree = TreeWrapper.get(path=tree_filename, schema=tree_schema)

        # Process the alignment file
        _, align_ext = os.path.splitext(alignment_filename)
        if align_ext == '.csv':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_csv_alignment_flo(
                    align_file)
        elif align_ext == '.json':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_json_alignment_flo(
                    align_file)
        elif align_ext == '.phylip':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_phylip_alignment_flo(align_file)
        elif align_ext == '.tbl':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_table_alignment_flo(align_file)
        else:
            raise IOError('Cannot handle alignments with extension: {}'.format(
                align_ext))

        char_mtx = data_readers.get_character_matrix_from_sequences_list(
            sequences)
        # Run analysis
        _, anc_mtx = anc_dp.calculate_continuous_ancestral_states(
            tree, char_mtx, calc_std_err=True, sum_to_one=False)

        # New testing method
        # (For now) assume that results file is csv with row headers for
        #    node labels and column headers for variables
        results = []
        h = None
        with open(results_filename) as results_file:
            for line in results_file:
                if h is None:
                    # Get headers
                    h = line.strip().split(',')[1:]
                else:
                    # Add result (without label) to list
                    node_result = [
                        float(i) for i in line.strip().split(',')[1:]
                    ]
                    results.append(np.array(node_result, dtype=float))

        # Look for all results (only maximum likelihood)
        for row in anc_mtx[:, :, 0]:
            found = False
            for i in range(len(results)):
                # Allow for some wiggle room with decimal precision
                if np.all(np.isclose(row, results[i])):
                    found = True
                    results.pop(i)
                    break
            if not found:
                raise Exception(
                    'Could not find expected result: {} in results'.format(
                        row))
예제 #12
0
    def test_package_valid(self, valid_ancestral_distribution_package):
        """Tests the calculate_ancestral_distributions method.

        Args:
            invalid_ancestral_distribution_package (pytest.fixture): A pytest
                fixture that is parametrized to provide invalid ancestral
                distributions, one at a time, so that there are multiple test
                functions defined for each invalid package.

        Raises:
            IOError: When the tree or alignment cannot be loaded for the
                specified file extension.
            Exception: When a specified successful result value cannot be
                found.
        """
        # Get the data files
        (tree_filename, alignment_filename,
         results_filename) = valid_ancestral_distribution_package
        # Process the tree file
        _, tree_ext = os.path.splitext(tree_filename)
        if tree_ext == '.nex':
            tree_schema = 'nexus'
        elif tree_ext == '.xml':
            tree_schema = 'nexml'
        elif tree_ext == '.tre':
            tree_schema = 'newick'
        else:
            raise IOError(
                'Cannot handle tree with extension: {}'.format(tree_ext))
        # tree = dendropy.Tree.get(path=tree_filename, schema=tree_schema)
        tree = TreeWrapper.get(path=tree_filename, schema=tree_schema)

        # Process the alignment file
        _, align_ext = os.path.splitext(alignment_filename)
        if align_ext == '.csv':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_csv_alignment_flo(
                    align_file)
        elif align_ext == '.json':
            with open(alignment_filename) as align_file:
                sequences, headers = data_readers.read_json_alignment_flo(
                    align_file)
        elif align_ext == '.phylip':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_phylip_alignment_flo(align_file)
        elif align_ext == '.tbl':
            with open(alignment_filename) as align_file:
                sequences = data_readers.read_table_alignment_flo(align_file)
        else:
            raise IOError('Cannot handle alignments with extension: {}'.format(
                align_ext))

        char_mtx = data_readers.get_character_matrix_from_sequences_list(
            sequences)
        # Run analysis
        _, anc_mtx = anc_dp.calculate_ancestral_distributions(tree, char_mtx)

        # Testing method
        # Assume that the results file is a csv with row headers for node
        #    labels and output layer (maximum_likeliehood / standard_error)
        #    and column headers for variables
        ml_results = []
        std_err_results = []
        h = None
        with open(results_filename) as results_file:
            for line in results_file:
                if h is None:
                    # Get headers
                    h = line.strip().split(',')[1:]
                else:
                    # Add result (without label) to appropriate list
                    parts = line.strip().split(',')
                    layer = parts[1].lower()
                    values = np.array([float(i) for i in parts[2:]],
                                      dtype=float)
                    if layer == 'maximum_likelihood':
                        ml_results.append(values)
                    else:
                        std_err_results.append(values)
        assert (len(ml_results) == len(std_err_results))
        print('ml results')
        print(ml_results)
        print('std err results')
        print(std_err_results)

        # Look for all results (ml and std err results should match rows)
        for row_idx in range(anc_mtx.shape[0]):
            found = False
            # Get rows from data
            ml_row = anc_mtx[row_idx, :, 0]
            std_err_row = anc_mtx[row_idx, :, 1]

            for i in range(len(ml_results)):
                print(ml_results[i])
                print(std_err_results[i])
                if np.all(np.isclose(ml_row, ml_results[i])) and \
                        np.all(np.isclose(
                            std_err_row, std_err_results[i])):
                    found = True
                    ml_results.pop(i)
                    std_err_results.pop(i)
                    break
            if not found:
                raise Exception('Could not find {}, {} in results'.format(
                    ml_row, std_err_row))