def _create_distance_matrix(self): dm = DistanceMatrix([[0.0, 0.850, 0.250], [0.850, 0.0, 0.500], [0.250, 0.500, 0.0]]) fd, fp = mkstemp(suffix='.txt', dir=self.out_dir) close(fd) dm.write(fp) return fp
def adonis(output_dir: str, distance_matrix: skbio.DistanceMatrix, metadata: qiime2.Metadata, formula: str, permutations: int = 999, n_jobs: int = 1) -> None: # Validate sample metadata is superset et cetera metadata_ids = set(metadata.ids) dm_ids = distance_matrix.ids _validate_metadata_is_superset(metadata_ids, set(dm_ids)) # filter ids. ids must be in same order as dm filtered_md = metadata.to_dataframe().reindex(dm_ids) filtered_md.index.name = 'sample-id' metadata = qiime2.Metadata(filtered_md) # Validate formula terms = ModelDesc.from_formula(formula) for t in terms.rhs_termlist: for i in t.factors: column = metadata.get_column(i.name()) if column.has_missing_values(): raise ValueError( 'adonis requires metadata columns with no ' 'NaN values (missing values in column `%s`.)' % (column.name, )) # Run adonis results_fp = os.path.join(output_dir, 'adonis.tsv') with tempfile.TemporaryDirectory() as temp_dir_name: dm_fp = os.path.join(temp_dir_name, 'dm.tsv') distance_matrix.write(dm_fp) md_fp = os.path.join(temp_dir_name, 'md.tsv') metadata.save(md_fp) cmd = [ 'run_adonis.R', dm_fp, md_fp, formula, str(permutations), str(n_jobs), results_fp ] _run_command(cmd) # Visualize results results = pd.read_csv(results_fp, sep='\t') results = q2templates.df_to_html(results) index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html') q2templates.render(index, output_dir, context={'results': results})
def adonis(output_dir: str, distance_matrix: skbio.DistanceMatrix, metadata: qiime2.Metadata, formula: str, permutations: int = 999, n_jobs: str = 1) -> None: # Validate sample metadata is superset et cetera metadata_ids = set(metadata.ids) dm_ids = distance_matrix.ids _validate_metadata_is_superset(metadata_ids, set(dm_ids)) # filter ids. ids must be in same order as dm filtered_md = metadata.to_dataframe().reindex(dm_ids) filtered_md.index.name = 'sample-id' metadata = qiime2.Metadata(filtered_md) # Validate formula terms = ModelDesc.from_formula(formula) for t in terms.rhs_termlist: for i in t.factors: metadata.get_column(i.name()) # Run adonis results_fp = os.path.join(output_dir, 'adonis.tsv') with tempfile.TemporaryDirectory() as temp_dir_name: dm_fp = os.path.join(temp_dir_name, 'dm.tsv') distance_matrix.write(dm_fp) md_fp = os.path.join(temp_dir_name, 'md.tsv') metadata.save(md_fp) cmd = ['run_adonis.R', dm_fp, md_fp, formula, str(permutations), str(n_jobs), results_fp] _run_command(cmd) # Visualize results results = pd.read_csv(results_fp, sep='\t') results = q2templates.df_to_html(results) index = os.path.join(TEMPLATES, 'adonis_assets', 'index.html') q2templates.render(index, output_dir, context={'results': results})
def main(): names, trees = [], [] for fname in sorted(listdir(argv[1])): if fname.endswith(ext): names.append(fname[:-len(ext)]) trees.append(TreeNode.read(join(argv[1], fname))) print('%d trees read.' % len(names)) n = len(trees) dmo, dmrf = np.zeros(shape=[n, n]), np.zeros(shape=[n, n]) for i in range(n): for j in range(i + 1, n): o, rf = compare_rfd_intersect(trees[i], trees[j]) print('%s - %s: %d, %.3f' % (names[i], names[j], o, rf)) dmo[i, j], dmrf[i, j] = o, rf dmo[j, i], dmrf[j, i] = o, rf # number of overlapping taxa matrix dmo = DistanceMatrix(dmo, names) dmo.write('%s.o.dm' % argv[2]) # Robinson-Foulds distance matrix dmrf = DistanceMatrix(dmrf, names) dmrf.write('%s.rf.dm' % argv[2])
def _1(data: skbio.DistanceMatrix) -> LSMatFormat: ff = LSMatFormat() with ff.open() as fh: data.write(fh, format='lsmat') return ff
class DistanceMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DistanceMatrixTests, self).setUp() self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3] self.dm_condensed_forms = [np.array([]), np.array([0.123]), np.array([0.01, 4.2, 12.0])] def test_io(self): # Very basic check that read/write public API is present and appears to # be functioning. Roundtrip from memory -> disk -> memory and ensure # results match. fh = StringIO() self.dm_3x3.write(fh) fh.seek(0) deserialized = DistanceMatrix.read(fh) self.assertEqual(deserialized, self.dm_3x3) self.assertTrue(type(deserialized) == DistanceMatrix) def test_deprecated_io(self): fh = StringIO() npt.assert_warns(UserWarning, self.dm_3x3.to_file, fh) fh.seek(0) deserialized = npt.assert_warns(UserWarning, DistanceMatrix.from_file, fh) self.assertEqual(deserialized, self.dm_3x3) self.assertTrue(type(deserialized) == DistanceMatrix) def test_init_invalid_input(self): # Asymmetric. data = [[0.0, 2.0], [1.0, 0.0]] with self.assertRaises(DistanceMatrixError): DistanceMatrix(data, ['a', 'b']) # Ensure that the superclass validation is still being performed. with self.assertRaises(DissimilarityMatrixError): DistanceMatrix([[1, 2, 3]], ['a']) def test_condensed_form(self): for dm, condensed in zip(self.dms, self.dm_condensed_forms): obs = dm.condensed_form() self.assertTrue(np.array_equal(obs, condensed)) def test_permute_condensed(self): # Can't really permute a 1x1 or 2x2... for _ in range(2): obs = self.dm_1x1.permute(condensed=True) npt.assert_equal(obs, np.array([])) for _ in range(2): obs = self.dm_2x2.permute(condensed=True) npt.assert_equal(obs, np.array([0.123])) dm_copy = self.dm_3x3.copy() np.random.seed(0) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([12.0, 4.2, 0.01])) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([4.2, 12.0, 0.01])) # Ensure dm hasn't changed after calling permute() on it a couple of # times. self.assertEqual(self.dm_3x3, dm_copy) def test_permute_not_condensed(self): obs = self.dm_1x1.permute() self.assertEqual(obs, self.dm_1x1) self.assertFalse(obs is self.dm_1x1) obs = self.dm_2x2.permute() self.assertEqual(obs, self.dm_2x2) self.assertFalse(obs is self.dm_2x2) np.random.seed(0) exp = DistanceMatrix([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) exp = DistanceMatrix([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) def test_eq(self): # Compare DistanceMatrix to DissimilarityMatrix, where both have the # same data and IDs. eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.assertTrue(self.dm_3x3 == eq_dm) self.assertTrue(eq_dm == self.dm_3x3)