Esempio n. 1
0
 def _create_distance_matrix(self):
     dm = DistanceMatrix([[0.0, 0.850, 0.250], [0.850, 0.0, 0.500],
                          [0.250, 0.500, 0.0]])
     fd, fp = mkstemp(suffix='.txt', dir=self.out_dir)
     close(fd)
     dm.write(fp)
     return fp
Esempio n. 2
0
def adonis(output_dir: str,
           distance_matrix: skbio.DistanceMatrix,
           metadata: qiime2.Metadata,
           formula: str,
           permutations: int = 999,
           n_jobs: int = 1) -> None:
    # Validate sample metadata is superset et cetera
    metadata_ids = set(metadata.ids)
    dm_ids = distance_matrix.ids
    _validate_metadata_is_superset(metadata_ids, set(dm_ids))
    # filter ids. ids must be in same order as dm
    filtered_md = metadata.to_dataframe().reindex(dm_ids)
    filtered_md.index.name = 'sample-id'
    metadata = qiime2.Metadata(filtered_md)

    # Validate formula
    terms = ModelDesc.from_formula(formula)
    for t in terms.rhs_termlist:
        for i in t.factors:
            column = metadata.get_column(i.name())
            if column.has_missing_values():
                raise ValueError(
                    'adonis requires metadata columns with no '
                    'NaN values (missing values in column `%s`.)' %
                    (column.name, ))

    # Run adonis
    results_fp = os.path.join(output_dir, 'adonis.tsv')
    with tempfile.TemporaryDirectory() as temp_dir_name:
        dm_fp = os.path.join(temp_dir_name, 'dm.tsv')
        distance_matrix.write(dm_fp)
        md_fp = os.path.join(temp_dir_name, 'md.tsv')
        metadata.save(md_fp)
        cmd = [
            'run_adonis.R', dm_fp, md_fp, formula,
            str(permutations),
            str(n_jobs), results_fp
        ]
        _run_command(cmd)

    # Visualize results
    results = pd.read_csv(results_fp, sep='\t')
    results = q2templates.df_to_html(results)
    index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html')
    q2templates.render(index, output_dir, context={'results': results})
Esempio n. 3
0
def adonis(output_dir: str,
           distance_matrix: skbio.DistanceMatrix,
           metadata: qiime2.Metadata,
           formula: str,
           permutations: int = 999,
           n_jobs: str = 1) -> None:
    # Validate sample metadata is superset et cetera
    metadata_ids = set(metadata.ids)
    dm_ids = distance_matrix.ids
    _validate_metadata_is_superset(metadata_ids, set(dm_ids))
    # filter ids. ids must be in same order as dm
    filtered_md = metadata.to_dataframe().reindex(dm_ids)
    filtered_md.index.name = 'sample-id'
    metadata = qiime2.Metadata(filtered_md)

    # Validate formula
    terms = ModelDesc.from_formula(formula)
    for t in terms.rhs_termlist:
        for i in t.factors:
            metadata.get_column(i.name())

    # Run adonis
    results_fp = os.path.join(output_dir, 'adonis.tsv')
    with tempfile.TemporaryDirectory() as temp_dir_name:
        dm_fp = os.path.join(temp_dir_name, 'dm.tsv')
        distance_matrix.write(dm_fp)
        md_fp = os.path.join(temp_dir_name, 'md.tsv')
        metadata.save(md_fp)
        cmd = ['run_adonis.R', dm_fp, md_fp, formula, str(permutations),
               str(n_jobs), results_fp]
        _run_command(cmd)

    # Visualize results
    results = pd.read_csv(results_fp, sep='\t')
    results = q2templates.df_to_html(results)
    index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html')
    q2templates.render(index, output_dir, context={'results': results})
Esempio n. 4
0
def main():
    names, trees = [], []
    for fname in sorted(listdir(argv[1])):
        if fname.endswith(ext):
            names.append(fname[:-len(ext)])
            trees.append(TreeNode.read(join(argv[1], fname)))
    print('%d trees read.' % len(names))

    n = len(trees)
    dmo, dmrf = np.zeros(shape=[n, n]), np.zeros(shape=[n, n])
    for i in range(n):
        for j in range(i + 1, n):
            o, rf = compare_rfd_intersect(trees[i], trees[j])
            print('%s - %s: %d, %.3f' % (names[i], names[j], o, rf))
            dmo[i, j], dmrf[i, j] = o, rf
            dmo[j, i], dmrf[j, i] = o, rf

    # number of overlapping taxa matrix
    dmo = DistanceMatrix(dmo, names)
    dmo.write('%s.o.dm' % argv[2])

    # Robinson-Foulds distance matrix
    dmrf = DistanceMatrix(dmrf, names)
    dmrf.write('%s.rf.dm' % argv[2])
Esempio n. 5
0
def _1(data: skbio.DistanceMatrix) -> LSMatFormat:
    ff = LSMatFormat()
    with ff.open() as fh:
        data.write(fh, format='lsmat')
    return ff
Esempio n. 6
0
class DistanceMatrixTests(DissimilarityMatrixTestData):
    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [np.array([]), np.array([0.123]),
                                   np.array([0.01, 4.2, 12.0])]

    def test_io(self):
        # Very basic check that read/write public API is present and appears to
        # be functioning. Roundtrip from memory -> disk -> memory and ensure
        # results match.
        fh = StringIO()
        self.dm_3x3.write(fh)
        fh.seek(0)
        deserialized = DistanceMatrix.read(fh)
        self.assertEqual(deserialized, self.dm_3x3)
        self.assertTrue(type(deserialized) == DistanceMatrix)

    def test_deprecated_io(self):
        fh = StringIO()
        npt.assert_warns(UserWarning, self.dm_3x3.to_file, fh)
        fh.seek(0)
        deserialized = npt.assert_warns(UserWarning,
                                        DistanceMatrix.from_file, fh)
        self.assertEqual(deserialized, self.dm_3x3)
        self.assertTrue(type(deserialized) == DistanceMatrix)

    def test_init_invalid_input(self):
        # Asymmetric.
        data = [[0.0, 2.0], [1.0, 0.0]]
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(data, ['a', 'b'])

        # Ensure that the superclass validation is still being performed.
        with self.assertRaises(DissimilarityMatrixError):
            DistanceMatrix([[1, 2, 3]], ['a'])

    def test_condensed_form(self):
        for dm, condensed in zip(self.dms, self.dm_condensed_forms):
            obs = dm.condensed_form()
            self.assertTrue(np.array_equal(obs, condensed))

    def test_permute_condensed(self):
        # Can't really permute a 1x1 or 2x2...
        for _ in range(2):
            obs = self.dm_1x1.permute(condensed=True)
            npt.assert_equal(obs, np.array([]))

        for _ in range(2):
            obs = self.dm_2x2.permute(condensed=True)
            npt.assert_equal(obs, np.array([0.123]))

        dm_copy = self.dm_3x3.copy()

        np.random.seed(0)

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([12.0, 4.2, 0.01]))

        obs = self.dm_3x3.permute(condensed=True)
        npt.assert_equal(obs, np.array([4.2, 12.0, 0.01]))

        # Ensure dm hasn't changed after calling permute() on it a couple of
        # times.
        self.assertEqual(self.dm_3x3, dm_copy)

    def test_permute_not_condensed(self):
        obs = self.dm_1x1.permute()
        self.assertEqual(obs, self.dm_1x1)
        self.assertFalse(obs is self.dm_1x1)

        obs = self.dm_2x2.permute()
        self.assertEqual(obs, self.dm_2x2)
        self.assertFalse(obs is self.dm_2x2)

        np.random.seed(0)

        exp = DistanceMatrix([[0, 12, 4.2],
                              [12, 0, 0.01],
                              [4.2, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

        exp = DistanceMatrix([[0, 4.2, 12],
                              [4.2, 0, 0.01],
                              [12, 0.01, 0]], self.dm_3x3.ids)
        obs = self.dm_3x3.permute()
        self.assertEqual(obs, exp)

    def test_eq(self):
        # Compare DistanceMatrix to DissimilarityMatrix, where both have the
        # same data and IDs.
        eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
        self.assertTrue(self.dm_3x3 == eq_dm)
        self.assertTrue(eq_dm == self.dm_3x3)
Esempio n. 7
0
def _1(data: skbio.DistanceMatrix) -> LSMatFormat:
    ff = LSMatFormat()
    with ff.open() as fh:
        data.write(fh, format='lsmat')
    return ff