def test_assert_ordination_results_equal(self): minimal1 = OrdinationResults([1, 2]) # a minimal set of results should be equal to itself assert_ordination_results_equal(minimal1, minimal1) # type mismatch with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, 'foo') # numeric values should be checked that they're almost equal almost_minimal1 = OrdinationResults([1.0000001, 1.9999999]) assert_ordination_results_equal(minimal1, almost_minimal1) # species_ids missing in one, present in the other almost_minimal1.species_ids = ['abc', 'def'] with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.species_ids = None # site_ids missing in one, present in the other almost_minimal1.site_ids = ['abc', 'def'] with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.site_ids = None # test each of the optional numeric attributes for attr in ('species', 'site', 'biplot', 'site_constraints', 'proportion_explained'): # missing optional numeric attribute in one, present in the other setattr(almost_minimal1, attr, [[1, 2], [3, 4]]) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) setattr(almost_minimal1, attr, None) # optional numeric attributes present in both, but not almost equal setattr(minimal1, attr, [[1, 2], [3, 4]]) setattr(almost_minimal1, attr, [[1, 2], [3.00002, 4]]) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) setattr(minimal1, attr, None) setattr(almost_minimal1, attr, None) # optional numeric attributes present in both, and almost equal setattr(minimal1, attr, [[1, 2], [3, 4]]) setattr(almost_minimal1, attr, [[1, 2], [3.00000002, 4]]) assert_ordination_results_equal(minimal1, almost_minimal1) setattr(minimal1, attr, None) setattr(almost_minimal1, attr, None)
def setUpClass(cls): axis_labels = ['PC1', 'PC2', 'PC3'] cls.test_df1 = pd.DataFrame.from_dict( { 's1': [0.1, 0.2, 7], 's2': [0.9, 0.2, 7], }, orient='index', columns=axis_labels, ) cls.test_df1.index.name = 'Sample ID' cls.pcoa1 = OrdinationResults( 'pcoa1', 'pcoa1', eigvals=pd.Series( [7, 2, 1], index=axis_labels, ), samples=cls.test_df1, proportion_explained=pd.Series( [0.7, 0.2, 0.1], index=axis_labels, ), ) cls.test_metadata = pd.DataFrame( { 'age_cat': ['30s', '40s', '50s', '30s', None], 'num_cat': [7.24, 7.24, 8.25, 7.24, None], 'other': [1, 2, 3, 4, None], }, index=pd.Series(['s1', 's2', 'c', 'd', 'e'], name='#SampleID'))
def setUp(self): self.test_dm = DistanceMatrix( np.array([ [0, 1, 2, 3, 4], [1, 0, 4, 5, 6], [2, 4, 0, 6, 7], [3, 5, 6, 0, 8], [4, 6, 7, 8, 0], ]), ids=[f'S{i}' for i in range(5)], ) n_samples = 100 np.random.seed(825) sample_embedding = np.random.normal(size=(n_samples, 3)) + 2 sample_embedding[:, 1] *= 3 sample_embedding[:, 2] *= 6 sample_df = pd.DataFrame( sample_embedding, index=[f'S{i}' for i in range(n_samples)], columns=[f'C{i}' for i in range(3)], ) self.test_ord_results = OrdinationResults( 'foo', 'bar', eigvals=pd.Series(np.arange(n_samples)), samples=sample_df, )
def test_str(self): exp = ("Ordination results:\n" "\tEigvals: 2\n" "\tProportion explained: N/A\n" "\tSpecies: 3x2\n" "\tSite: 3x2\n" "\tBiplot: N/A\n" "\tSite constraints: N/A\n" "\tSpecies IDs: 'Species1', 'Species2', 'Species3'\n" "\tSite IDs: 'Site1', 'Site2', 'Site3'") obs = str(self.ordination_results) self.assertEqual(obs, exp) # all optional attributes missing exp = ("Ordination results:\n" "\tEigvals: 1\n" "\tProportion explained: N/A\n" "\tSpecies: N/A\n" "\tSite: N/A\n" "\tBiplot: N/A\n" "\tSite constraints: N/A\n" "\tSpecies IDs: N/A\n" "\tSite IDs: N/A") obs = str(OrdinationResults(np.array([4.2]))) self.assertEqual(obs, exp)
def test_get_procrustes_results(self): sample_id_map = { 'CP3A1': 'S1', 'CC1A1': 'S2', 'CC2A1': 'S3', 'CP1A1': 'S4' } actual = get_procrustes_results(StringIO(pcoa1_f), StringIO(pcoa1_f), sample_id_map=sample_id_map, randomize=None, max_dimensions=None) # just some sanity checks as the individual componenets are # already tested -- these are based on looking at the output of the # run, and testing to ensure that it hasn't changed eigvals = array([ 8976580.24393, 6044862.67619, 4372581.39431, 3161360.10319, 2583594.45275, 2407555.39787 ]) prop_expl = array([ 23.1764657118, 15.6071186064, 11.2894866423, 8.16225689998, 6.67053450426, 6.21602253997 ]) site = array([[ -0.199225958574, -0.250846540029, -0.119813087305, -0.155652031006, 0.18495315824, -0.160875399364 ], [ -0.238263544222, -0.37724227779, -0.169458651217, 0.0305157004776, 0.112181007345, 0.0677415967093 ], [ 0.116737988534, 0.414627960015, 0.201315243115, 0.113769076804, -0.283025353088, -0.144278863311 ], [ 0.320751514262, 0.213460857804, 0.0879564954067, 0.0113672537238, -0.0141088124974, 0.237412665966 ]]) site_ids = ['S3', 'S2', 'S1', 'S4'] expected = OrdinationResults(eigvals=eigvals, proportion_explained=prop_expl, site=site, site_ids=site_ids) assert_almost_equal(actual[0].eigvals, expected.eigvals) assert_almost_equal(actual[0].proportion_explained, expected.proportion_explained) self.assertEqual(actual[0].site_ids, expected.site_ids) assert_almost_equal(actual[0].site, expected.site) assert_almost_equal(actual[1].eigvals, expected.eigvals) assert_almost_equal(actual[1].proportion_explained, expected.proportion_explained) assert_almost_equal(actual[1].site, expected.site) self.assertEqual(actual[1].site_ids, expected.site_ids) self.assertTrue(actual[2] < 6e-30)
def setUpClass(cls): axis_labels = ['PC1', 'PC2', 'PC3'] cls.test_df1 = pd.DataFrame.from_dict( { 's1': [0.1, 0.2, 7], 's2': [0.9, 0.2, 7], }, orient='index', columns=axis_labels, ) cls.test_df1.index.name = 'Sample ID' cls.pcoa1 = OrdinationResults( 'pcoa1', 'pcoa1', eigvals=pd.Series( [7, 2, 1], index=axis_labels, ), samples=cls.test_df1, proportion_explained=pd.Series( [0.7, 0.2, 0.1], index=axis_labels, ), ) cls.test_metadata = pd.DataFrame( { 'age_cat': ['30s', '40s', '50s', '30s', None], 'num_cat': [7.24, 7.24, 8.25, 7.24, None], 'other': [1, 2, 3, 4, None], }, index=pd.Series(['s1', 's2', 'c', 'd', 'e'], name='#SampleID')) cls.resources = DictElement({ 'datasets': DictElement({ 'dataset1': DictElement({ '__metadata__': MockMetadataElement(cls.test_metadata), '__pcoa__': PCOAElement({ 'sample_set': DictElement({ 'beta_metric': cls.pcoa1, }), }) }), 'dataset2': DictElement({ '__metadata__': MockMetadataElement(cls.test_metadata), }), }), }) cls.resources.accept(TrivialVisitor()) cls.res_patcher = patch( 'microsetta_public_api.api.emperor.get_resources') cls.mock_resources = cls.res_patcher.start() cls.mock_resources.return_value = cls.resources
def setUp(self): # Define in-memory CA results to serialize and deserialize. eigvals = np.array([0.0961330159181, 0.0409418140138]) species = np.array([[0.408869425742, 0.0695518116298], [-0.1153860437, -0.299767683538], [-0.309967102571, 0.187391917117]]) site = np.array([[-0.848956053187, 0.882764759014], [-0.220458650578, -1.34482000302], [1.66697179591, 0.470324389808]]) biplot = None site_constraints = None prop_explained = None species_ids = ['Species1', 'Species2', 'Species3'] site_ids = ['Site1', 'Site2', 'Site3'] self.ordination_results = OrdinationResults( eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # DataFrame for testing plot method. Has a categorical column with a # mix of numbers and strings. Has a numeric column with a mix of ints, # floats, and strings that can be converted to floats. Has a numeric # column with missing data (np.nan). self.df = pd.DataFrame([['foo', '42', 10], [22, 0, 8], [22, -4.2, np.nan], ['foo', '42.19', 11]], index=['A', 'B', 'C', 'D'], columns=['categorical', 'numeric', 'nancolumn']) # Minimal ordination results for easier testing of plotting method. # Paired with df above. eigvals = np.array([0.50, 0.25, 0.25]) site = np.array([[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]]) self.min_ord_results = OrdinationResults(eigvals=eigvals, site=site, site_ids=['A', 'B', 'C', 'D'])
def embed( distance_matrix: DistanceMatrix, n_neighbors: int, min_dist: float = 1, number_of_dimensions: int = 2, random_state: int = 724, ) -> OrdinationResults: n_samples = len(distance_matrix.ids) if number_of_dimensions > n_samples: raise ValueError( f'number_of_dimensions ({number_of_dimensions}) must be fewer than' f'number of samples ({n_samples}) - 2' ) transformer = UMAP( n_neighbors=n_neighbors, n_components=number_of_dimensions, min_dist=min_dist, random_state=random_state, metric='precomputed', ) embedding = transformer.fit_transform(distance_matrix[:, :]) if embedding.shape[1] < 3: difference = 3 - embedding.shape[1] embedding = np.hstack((embedding, np.zeros((len(embedding), difference)))) number_of_dimensions = embedding.shape[1] embedding_df = pd.DataFrame(embedding, index=distance_matrix.ids, columns=[f'UMAP-{i}' for i in range(embedding.shape[1])] ) null_eigvals = pd.Series(np.zeros(number_of_dimensions)) ord_results = OrdinationResults( 'umap', 'Uniform Manifold Approximation and Projection', eigvals=null_eigvals, samples=embedding_df, proportion_explained=null_eigvals, ) return center(ord_results)
def emperor_output(sklearn_output, full_file_list, eigenvalues, percent_variance, output_file, new_files = None): print("Made it to Emperor Function!") #read in sklearn output and format accordingly for emperor intake eigvals = pd.Series(data = eigenvalues) samples = pd.DataFrame(data = sklearn_output, index = full_file_list) p_explained = pd.Series(data = percent_variance) ores = OrdinationResults(long_method_name = "principal component analysis", short_method_name = "pcoa", eigvals = eigvals, samples = samples, proportion_explained = p_explained) #this first part is for the global metadata file global_metadata = pd.read_csv(config.PATH_TO_ORIGINAL_MAPPING_FILE, sep = "\t") global_metadata_headers = global_metadata.columns.tolist() global_metadata.rename(columns = {'filename': 'SampleID'}, inplace = True) global_metadata["type"] = "Global Data" global_metadata.set_index("SampleID", inplace = True) common = global_metadata #this part is for the user uploaded metadata file if new_files != None: metadata_uploaded = pd.DataFrame({"SampleID": new_files, "type":["Your Data"] * len(new_files)}) for item in global_metadata_headers: metadata_uploaded[item] = ["Your Data"] * len(new_files) metadata_uploaded.set_index("SampleID", inplace = True) common = pd.concat([global_metadata, metadata_uploaded]) #so you need to align the metadata and the files contained within the ordination file BEFORE feeding it into the Emperor thing otherwise it doesn't like to output results final_metadata, unused = common.align(samples, join = "right", axis = 0) #call stuff to ouput an emperor plot emp = Emperor(ores, final_metadata, remote = True) # create an output directory os.makedirs(output_file, exist_ok=True) with open(os.path.join(output_file, 'index.html'), 'w') as f: f.write(emp.make_emperor(standalone = True)) emp.copy_support_files(output_file)
def center(embedding: OrdinationResults) -> OrdinationResults: short_name = embedding.short_method_name long_name = embedding.long_method_name n_dimensions = embedding.samples.shape[1] transformer = PCA(n_components=n_dimensions) new_embedding = transformer.fit_transform(embedding.samples) embedding_df = pd.DataFrame(new_embedding, index=embedding.samples.index, columns=embedding.samples.columns ) null_eigvals = pd.Series(np.zeros(n_dimensions)) ord_results = OrdinationResults( short_name, long_name, eigvals=null_eigvals, samples=embedding_df, proportion_explained=null_eigvals, ) return ord_results
def emperor_output(sklearn_output, full_file_list, eigenvalues, percent_variance, output_file, new_files=[]): eigvals = pd.Series(data=eigenvalues) samples = pd.DataFrame(data=sklearn_output, index=full_file_list) samples.index.rename("SampleID", inplace=True) p_explained = pd.Series(data=percent_variance) ores = OrdinationResults(long_method_name="principal component analysis", short_method_name="pcoa", eigvals=eigvals, samples=samples, proportion_explained=p_explained) #read in all sample metadata df = pd.read_table(config.PATH_TO_ORIGINAL_MAPPING_FILE) df.rename(columns={"filename": "SampleID"}, inplace=True) df.set_index("SampleID", inplace=True) #handling the case in which the pca is a projection if len(new_files) != 0: df["Type"] = "Global" new_meta = pd.DataFrame({"SampleID": new_files, "Type": "Your Data"}) new_meta.set_index("SampleID", inplace=True) df = pd.concat([df, new_meta], axis=0, join="outer") final_metadata, unused = df.align(samples, join="right", axis=0) #call stuff to ouput an emperor plot emp = Emperor(ores, final_metadata, remote=True) # create an output directory os.makedirs(output_file, exist_ok=True) with open(os.path.join(output_file, 'index.html'), 'w') as f: f.write(emp.make_emperor(standalone=True)) emp.copy_support_files(output_file)
def _ordination_to_ordination_results(fh): eigvals = _parse_vector_section(fh, 'Eigvals') if eigvals is None: raise OrdinationFormatError("At least one eigval must be present.") _check_empty_line(fh) prop_expl = _parse_vector_section(fh, 'Proportion explained') _check_length_against_eigvals(prop_expl, eigvals, 'proportion explained values') _check_empty_line(fh) species, species_ids = _parse_array_section(fh, 'Species') _check_length_against_eigvals(species, eigvals, 'coordinates per species') _check_empty_line(fh) site, site_ids = _parse_array_section(fh, 'Site') _check_length_against_eigvals(site, eigvals, 'coordinates per site') _check_empty_line(fh) # biplot does not have ids to parse (the other arrays do) biplot, _ = _parse_array_section(fh, 'Biplot', has_ids=False) _check_empty_line(fh) cons, cons_ids = _parse_array_section(fh, 'Site constraints') if cons_ids is not None and site_ids is not None: if cons_ids != site_ids: raise OrdinationFormatError( "Site constraints ids and site ids must be equal: %s != %s" % (cons_ids, site_ids)) return OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=cons, proportion_explained=prop_expl, species_ids=species_ids, site_ids=site_ids)
def _ordination_to_ordination_results(fh): eigvals = _parse_vector_section(fh, 'Eigvals') if eigvals is None: raise OrdinationFormatError("At least one eigval must be present.") _check_empty_line(fh) prop_expl = _parse_vector_section(fh, 'Proportion explained') _check_length_against_eigvals(prop_expl, eigvals, 'proportion explained values') _check_empty_line(fh) species = _parse_array_section(fh, 'Species') _check_length_against_eigvals(species, eigvals, 'coordinates per species') _check_empty_line(fh) site = _parse_array_section(fh, 'Site') _check_length_against_eigvals(site, eigvals, 'coordinates per site') _check_empty_line(fh) # biplot does not have ids to parse (the other arrays do) biplot = _parse_array_section(fh, 'Biplot', has_ids=False) _check_empty_line(fh) cons = _parse_array_section(fh, 'Site constraints') if cons is not None and site is not None: if not np.array_equal(cons.index, site.index): raise OrdinationFormatError( "Site constraints ids and site ids must be equal: %s != %s" % (cons.index, site.index)) return OrdinationResults( short_method_name='', long_method_name='', eigvals=eigvals, features=species, samples=site, biplot_scores=biplot, sample_constraints=cons, proportion_explained=prop_expl)
def setUp(self): super().setUp() axis_labels = ['PC1', 'PC2', 'PC3'] self.resources = ResourceManager() self.fh1 = self.create_tempfile(suffix='.qza') self.fh2 = self.create_tempfile(suffix='.qza') self.pcoa_path1 = self.fh1.name self.pcoa_path2 = self.fh2.name self.test_df1 = pd.DataFrame.from_dict( { 's1': [0.1, 0.2, 7], 's2': [0.9, 0.2, 7], }, orient='index', columns=axis_labels, ) self.test_df1.index.name = 'Sample ID' self.test_df2 = pd.DataFrame.from_dict( { 's1': [0.1, 0.2, 7], 's2': [0.9, 0.2, 7], 's3': [0.2, -0.3, 0], 's4': [0.111, -4, 0.2], }, orient='index', columns=axis_labels, ) self.test_df2.index.name = 'Sample ID' self.pcoa1 = OrdinationResults( 'pcoa1', 'pcoa1', eigvals=pd.Series( [7, 2, 1], index=axis_labels, ), samples=self.test_df1, proportion_explained=pd.Series( [0.7, 0.2, 0.1], index=axis_labels, ), ) self.pcoa2 = OrdinationResults( 'pcoa2', 'pcoa2', eigvals=pd.Series( [6, 3, 1], index=axis_labels, ), samples=self.test_df2, proportion_explained=pd.Series( [0.6, 0.3, 0.1], index=axis_labels, ), ) imported_artifact = Artifact.import_data( "PCoAResults", self.pcoa1, ) imported_artifact.save(self.pcoa_path1) imported_artifact = Artifact.import_data( "PCoAResults", self.pcoa2, ) imported_artifact.save(self.pcoa_path2)
def setUp(self): eigvals = np.array([0.512367260461, 0.300719094427, 0.267912066004, 0.208988681078, 0.19169895326, 0.16054234528, 0.15017695712, 0.122457748167, 0.0]) site = np.array([[-0.212230626531, 0.216034194368, 0.03532727349, -0.254450494129, -0.0687468542543, 0.231895596562, 0.00496549154314, -0.0026246871695, 9.73837390723e-10], [-0.277487312135, -0.0295483215975, -0.0744173437992, 0.0957182357964, 0.204714844022, -0.0055407341857, -0.190287966833, 0.16307126638, 9.73837390723e-10], [0.220886492631, 0.0874848360559, -0.351990132198, -0.00316535032886, 0.114635191853, -0.00019194106125, 0.188557853937, 0.030002427212, 9.73837390723e-10], [0.0308923744062, -0.0446295973489, 0.133996451689, 0.29318228566, -0.167812539312, 0.130996149793, 0.113551017379, 0.109987942454, 9.73837390723e-10], [0.27616778138, -0.0341866951102, 0.0633000238256, 0.100446653327, 0.123802521199, 0.1285839664, -0.132852841046, -0.217514322505, 9.73837390723e-10], [0.202458130052, -0.115216120518, 0.301820871723, -0.18300251046, 0.136208248567, -0.0989435556722, 0.0927738484879, 0.0909429797672, 9.73837390723e-10], [0.236467470907, 0.21863434374, -0.0301637746424, -0.0225473129718, -0.205287183891, -0.180224615141, -0.165277751908, 0.0411933458557, 9.73837390723e-10], [-0.105517545144, -0.41405687433, -0.150073017617, -0.116066751485, -0.158763393475, -0.0223918378516, -0.0263068046112, -0.0501209518091, 9.73837390723e-10], [-0.371636765565, 0.115484234741, 0.0721996475289, 0.0898852445906, 0.0212491652909, -0.184183028843, 0.114877153051, -0.164938000185, 9.73837390723e-10]]) prop_expl = np.array([25.6216900347, 15.7715955926, 14.1215046787, 11.6913885817, 9.83044890697, 8.51253468595, 7.88775505332, 6.56308246609, 4.42499350906e-16]) site_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634'] self.ord_res = OrdinationResults(eigvals=eigvals, site=site, proportion_explained=prop_expl, site_ids=site_ids) metadata_map = {'PC.354': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '60', 'Description': 'Control_mouse_I.D._354'}, 'PC.355': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '55', 'Description': 'Control_mouse_I.D._355'}, 'PC.356': {'Treatment': 'Control', 'DOB': '20061126', 'Weight': '50', 'Description': 'Control_mouse_I.D._356'}, 'PC.481': {'Treatment': 'Control', 'DOB': '20070314', 'Weight': '52', 'Description': 'Control_mouse_I.D._481'}, 'PC.593': {'Treatment': 'Control', 'DOB': '20071210', 'Weight': '57', 'Description': 'Control_mouse_I.D._593'}, 'PC.607': {'Treatment': 'Fast', 'DOB': '20071112', 'Weight': '65', 'Description': 'Fasting_mouse_I.D._607'}, 'PC.634': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '68', 'Description': 'Fasting_mouse_I.D._634'}, 'PC.635': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '70', 'Description': 'Fasting_mouse_I.D._635'}, 'PC.636': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '72', 'Description': 'Fasting_mouse_I.D._636'}} self.metadata_map = pd.DataFrame.from_dict(metadata_map, orient='index') self.categories = ['Treatment'] self.sort_by = 'Weight'
def setUp(self): super(OrdinationResultsReaderWriterTests, self).setUp() # define in-memory results, one for each of the valid files in # self.valid_fps # CA results eigvals = np.array([0.0961330159181, 0.0409418140138]) species = np.array([[0.408869425742, 0.0695518116298], [-0.1153860437, -0.299767683538], [-0.309967102571, 0.187391917117]]) site = np.array([[-0.848956053187, 0.882764759014], [-0.220458650578, -1.34482000302], [1.66697179591, 0.470324389808]]) biplot = None site_constraints = None prop_explained = None species_ids = ['Species1', 'Species2', 'Species3'] site_ids = ['Site1', 'Site2', 'Site3'] ca_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # CCA results eigvals = np.array([ 0.366135830393, 0.186887643052, 0.0788466514249, 0.082287840501, 0.0351348475787, 0.0233265839374, 0.0099048981912, 0.00122461669234, 0.000417454724117 ]) species = np.loadtxt(get_data_path('ordres_exp_OrdRes_CCA_species')) site = np.loadtxt(get_data_path('ordres_exp_OrdRes_CCA_site')) biplot = np.array( [[-0.169746767979, 0.63069090084, 0.760769036049], [-0.994016563505, 0.0609533148724, -0.0449369418179], [0.184352565909, -0.974867543612, 0.0309865007541]]) site_constraints = np.loadtxt( get_data_path('ordres_exp_OrdRes_CCA_site_constraints')) prop_explained = None species_ids = [ 'Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8' ] site_ids = [ 'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9' ] cca_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # PCoA results eigvals = np.array([ 0.512367260461, 0.300719094427, 0.267912066004, 0.208988681078, 0.19169895326, 0.16054234528, 0.15017695712, 0.122457748167, 0.0 ]) species = None site = np.loadtxt(get_data_path('ordres_exp_OrdRes_PCoA_site')) biplot = None site_constraints = None prop_explained = np.array([ 0.267573832777, 0.15704469605, 0.139911863774, 0.109140272454, 0.100111048503, 0.0838401161912, 0.0784269939011, 0.0639511763509, 0.0 ]) species_ids = None site_ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ] pcoa_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # RDA results eigvals = np.array([ 25.8979540892, 14.9825779819, 8.93784077262, 6.13995623072, 1.68070536498, 0.57735026919, 0.275983624351 ]) species = np.loadtxt(get_data_path('ordres_exp_OrdRes_RDA_species')) site = np.loadtxt(get_data_path('ordres_exp_OrdRes_RDA_site')) biplot = np.array([[0.422650019179, -0.559142585857, -0.713250678211], [0.988495963777, 0.150787422017, -0.0117848614073], [-0.556516618887, 0.817599992718, 0.147714267459], [-0.404079676685, -0.9058434809, -0.127150316558]]) site_constraints = np.loadtxt( get_data_path('ordres_exp_OrdRes_RDA_site_constraints')) prop_explained = None species_ids = [ 'Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5' ] site_ids = [ 'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9' ] rda_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) self.ordination_results_objs = [ ca_scores, cca_scores, pcoa_scores, rda_scores ]
def get_procrustes_results(coords_f1, coords_f2, sample_id_map=None, randomize=None, max_dimensions=None, get_eigenvalues=get_mean_eigenvalues, get_percent_variation_explained=get_mean_percent_variation): """ """ # Parse the PCoA files ord_res_1 = OrdinationResults.read(coords_f1) ord_res_2 = OrdinationResults.read(coords_f2) sample_ids1 = ord_res_1.site_ids coords1 = ord_res_1.site eigvals1 = ord_res_1.eigvals pct_var1 = ord_res_1.proportion_explained sample_ids2 = ord_res_2.site_ids coords2 = ord_res_2.site eigvals2 = ord_res_2.eigvals pct_var2 = ord_res_2.proportion_explained if sample_id_map: sample_ids1 = map_sample_ids(sample_ids1, sample_id_map) sample_ids2 = map_sample_ids(sample_ids2, sample_id_map) # rearrange the order of coords in coords2 to correspond to # the order of coords in coords1 order = list(set(sample_ids1) & set(sample_ids2)) coords1 = reorder_coords(coords1, sample_ids1, order) coords2 = reorder_coords(coords2, sample_ids2, order) if len(order) == 0: raise ValueError('No overlapping samples in the two files') # If this is a random trial, apply the shuffling function passed as # randomize() if randomize: coords2 = randomize(coords2) randomized_coords2 = OrdinationResults(eigvals=eigvals2, proportion_explained=pct_var2, site=coords2, site_ids=order) else: randomized_coords2 = None coords1, coords2 = pad_coords_matrices(coords1, coords2) if max_dimensions: coords1 = filter_coords_matrix(coords1, max_dimensions) coords2 = filter_coords_matrix(coords2, max_dimensions) pct_var1 = pct_var1[:max_dimensions] pct_var2 = pct_var2[:max_dimensions] eigvals1 = eigvals1[:max_dimensions] eigvals2 = eigvals2[:max_dimensions] else: if len(pct_var1) > len(pct_var2): pct_var2 = append(pct_var2, zeros(len(pct_var1) - len(pct_var2))) eigvals2 = append(eigvals2, zeros(len(eigvals1) - len(eigvals2))) elif len(pct_var1) < len(pct_var2): pct_var1 = append(pct_var1, zeros(len(pct_var2) - len(pct_var1))) eigvals1 = append(eigvals1, zeros(len(eigvals2) - len(eigvals1))) # Run the Procrustes analysis transformed_coords_m1, transformed_coords_m2, m_squared =\ procrustes(coords1, coords2) # print coords2 # print transformed_coords_m2 eigvals = get_eigenvalues(eigvals1, eigvals2) pct_var = get_percent_variation_explained(pct_var1, pct_var2) transformed_coords1 = OrdinationResults(eigvals=asarray(eigvals), proportion_explained=asarray(pct_var), site=asarray(transformed_coords_m1), site_ids=order) transformed_coords2 = OrdinationResults(eigvals=asarray(eigvals), proportion_explained=asarray(pct_var), site=asarray(transformed_coords_m2), site_ids=order) # Return the results return (transformed_coords1, transformed_coords2, m_squared, randomized_coords2)