def test_from_file_error(self): for test_path in self.fferror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(FileFormatError): OrdinationResults.from_file(f) for test_path in self.verror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(ValueError): OrdinationResults.from_file(f)
def test_from_file(self): for exp_scores, test_path in zip(self.scores, self.test_paths): for file_type in ('file like', 'file name'): fname = get_data_path(test_path) if file_type == 'file like': with open(fname) as fh: obs = OrdinationResults.from_file(fh) elif file_type == 'file name': obs = OrdinationResults.from_file(fname) yield self.check_OrdinationResults_equal, obs, exp_scores
def body_site(coords, mapping_file): """Generates as many figures as samples in the coordinates file""" o = OrdinationResults.from_file(coords) # coordinates c_df = pd.DataFrame(o.site, o.site_ids) # mapping file mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str), index_col='#SampleID') mf = mf.loc[o.site_ids] color_hmp_fecal = sns.color_palette('Paired', 12)[10] # light brown color_agp_fecal = sns.color_palette('Paired', 12)[11] # dark brown color_hmp_oral = sns.color_palette('Paired', 12)[0] # light blue color_agp_oral = sns.color_palette('Paired', 12)[1] # dark blue color_hmp_skin = sns.color_palette('Paired', 12)[2] # light green color_agp_skin = sns.color_palette('Paired', 12)[3] # dark green grp_colors = {'AGP-FECAL': color_agp_fecal, 'AGP-ORAL': color_agp_oral, 'AGP-SKIN': color_agp_skin, 'HMP-FECAL': color_hmp_fecal, 'GG-FECAL': color_hmp_fecal, 'PGP-FECAL': color_hmp_fecal, 'HMP-ORAL': color_hmp_oral, 'PGP-ORAL': color_hmp_oral, 'HMP-SKIN': color_hmp_skin, 'PGP-SKIN': color_hmp_skin} for sample in mf.index: # plot categories as 50 slices with random zorder for grp, color in grp_colors.iteritems(): sub_coords = c_df[mf.TITLE_BODY_SITE == grp].values for i in np.array_split(sub_coords, 50): plt.scatter(i[:, 0], i[:, 1], color=color, edgecolor=np.asarray(color)*0.6, lw=LINE_WIDTH, alpha=ALPHA, zorder=np.random.rand()) # plot participant's dot plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1], color=grp_colors[mf.loc[sample]['TITLE_BODY_SITE']], s=270, edgecolor='w', zorder=1) plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1], color=grp_colors[mf.loc[sample]['TITLE_BODY_SITE']], s=250, edgecolor=np.asarray( grp_colors[mf.loc[sample]['TITLE_BODY_SITE']])*0.6, zorder=2) plt.axis('off') my_dpi = 72 plt.savefig(sample+'.pdf', figsize=(1000/my_dpi, 1000/my_dpi), dpi=my_dpi) plt.close()
def gradient(coords, mapping_file, color): """Generates as many figures as samples in the coordinates file""" o = OrdinationResults.from_file(coords) # coordinates c_df = pd.DataFrame(o.site, o.site_ids) # mapping file mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str), index_col='#SampleID') mf = mf.loc[o.site_ids] mf[color] = mf[color].convert_objects(convert_numeric=True) numeric = mf[~pd.isnull(mf[color])] non_numeric = mf[pd.isnull(mf[color])] color_array = plt.cm.RdBu(numeric[color]/max(numeric[color])) for sample in mf.index: # plot numeric metadata as colored gradient ids = numeric.index x, y = c_df.loc[ids][0], c_df.loc[ids][1] plt.scatter(x, y, c=numeric[color], cmap=plt.get_cmap('RdBu'), alpha=ALPHA, lw=LINE_WIDTH, edgecolor=color_array*0.6) # plt.colorbar() # plot non-numeric metadata as gray ids = non_numeric.index x, y = c_df.loc[ids][0], c_df.loc[ids][1] plt.scatter(x, y, c='0.5', alpha=ALPHA, lw=LINE_WIDTH, edgecolor='0.3') # plot individual's dot try: color_index = numeric.index.tolist().index(sample) except ValueError: color_index = None if color_index is None: _color = (0.5, 0.5, 0.5) else: _color = color_array[color_index] plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1], color=_color, s=270, edgecolor='w') plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1], color=_color, s=250, edgecolor=np.asarray(_color)*0.6) plt.axis('off') my_dpi = 72 plt.savefig(sample+'.pdf', figsize=(1000/my_dpi, 1000/my_dpi), dpi=my_dpi) plt.close()
def format_coords(coord_header, coords, eigvals, pct_var, headers=True): """formats coords given specified coords matrix etc.""" result = [] if (headers): result.append('pc vector number\t' + '\t'.join(map(str, range(1, len(coords[0]) + 1)))) for name, row in zip(coord_header, coords): result.append('\t'.join([name] + map(str, row))) result.append('') result.append('') result.append('eigvals\t' + '\t'.join(map(str, eigvals))) result.append('% variation explained\t' + '\t'.join(map(str, pct_var))) else: result = ['\t'.join(map(str, row)) for row in coords] result.append('') return '\n'.join(result) if __name__ == "__main__": old_file = argv[1] new_file = argv[2] with open(old_file, 'U') as infile: with open(new_file, 'w') as outfile: res = OrdinationResults.from_file(infile) lines = format_coords(res.site_ids, res.site, res.eigvals, res.proportion_explained) outfile.write(lines)
def country(coords, mapping_file): """Generates as many figures as samples in the coordinates file""" o = OrdinationResults.from_file(coords) x, y = o.site[:, 0], o.site[:, 1] # coordinates c_df = pd.DataFrame(o.site, o.site_ids) # mapping file mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str), index_col='#SampleID') mf = mf.loc[o.site_ids] color_Venezuela = sns.color_palette('Paired', 12)[10] color_Malawi = sns.color_palette('Paired', 12)[1] color_Western = sns.color_palette('Paired', 12)[4] color_Highlight = sns.color_palette('Paired', 12)[5] color_no_data = (0.5, 0.5, 0.5) grp_colors = OrderedDict() grp_colors['no_data'] = color_no_data grp_colors['Australia'] = color_Western grp_colors['Belgium'] = color_Western grp_colors['Canada'] = color_Western grp_colors['China'] = color_Western grp_colors['Finland'] = color_Western grp_colors['France'] = color_Western grp_colors['Germany'] = color_Western grp_colors['Great Britain'] = color_Western grp_colors['Ireland'] = color_Western grp_colors['Japan'] = color_Western grp_colors['Netherlands'] = color_Western grp_colors['New Zealand'] = color_Western grp_colors['Norway'] = color_Western grp_colors['Scotland'] = color_Western grp_colors['Spain'] = color_Western grp_colors['Switzerland'] = color_Western grp_colors['Thailand'] = color_Western grp_colors['United Arab Emirates'] = color_Western grp_colors['United Kingdom'] = color_Western grp_colors['United States of America'] = color_Western grp_colors['Malawi'] = color_Malawi grp_colors['Venezuela'] = color_Venezuela for sample in mf.index: # countour plot superimposed sns.kdeplot(x, y, cmap='bone') sns.set_context(rc={"lines.linewidth": 0.75}) # change particapant's country's color to color_Highlight unless # country is Venezuela or Malawi if (mf.loc[sample]['COUNTRY'] != 'Malawi') & ( mf.loc[sample]['COUNTRY'] != 'Venezuela'): grp_colors[mf.loc[sample]['COUNTRY']] = color_Highlight # plot each country except participant's according to colors above for grp, color in grp_colors.iteritems(): if grp == mf.loc[sample]['COUNTRY']: continue sub_coords = c_df[mf.COUNTRY == grp] plt.scatter(sub_coords[0], sub_coords[1], color=color, edgecolor=np.asarray(color)*0.6, lw=LINE_WIDTH, alpha=ALPHA) # now plot participant's country grp = mf.loc[sample]['COUNTRY'] color = grp_colors[grp] sub_coords = c_df[mf.COUNTRY == grp] plt.scatter(sub_coords[0], sub_coords[1], color=color, edgecolor=np.asarray(color)*0.6, lw=LINE_WIDTH, alpha=ALPHA) # plot participant's dot plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1], color=grp_colors[mf.loc[sample]['COUNTRY']], s=270, edgecolor='w', zorder=1) plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1], color=grp_colors[mf.loc[sample]['COUNTRY']], s=250, edgecolor=np.asarray(grp_colors[mf.loc[sample] ['COUNTRY']])*0.6, zorder=2) # reset particapant's country's color to color_Western unless country # is Venezuela or Malawi if (mf.loc[sample]['COUNTRY'] != 'Malawi') & ( mf.loc[sample]['COUNTRY'] != 'Venezuela'): grp_colors[mf.loc[sample]['COUNTRY']] = color_Western plt.axis('off') my_dpi = 72 plt.savefig(sample+'.pdf', figsize=(1000/my_dpi, 1000/my_dpi), dpi=my_dpi) plt.close()