예제 #1
0
    def test_from_file_error(self):
        for test_path in self.fferror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(FileFormatError):
                    OrdinationResults.from_file(f)

        for test_path in self.verror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(ValueError):
                    OrdinationResults.from_file(f)
예제 #2
0
    def test_from_file_error(self):
        for test_path in self.fferror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(FileFormatError):
                    OrdinationResults.from_file(f)

        for test_path in self.verror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(ValueError):
                    OrdinationResults.from_file(f)
예제 #3
0
    def test_from_file(self):
        for exp_scores, test_path in zip(self.scores, self.test_paths):
            for file_type in ('file like', 'file name'):
                fname = get_data_path(test_path)
                if file_type == 'file like':
                    with open(fname) as fh:
                        obs = OrdinationResults.from_file(fh)
                elif file_type == 'file name':
                    obs = OrdinationResults.from_file(fname)

                yield self.check_OrdinationResults_equal, obs, exp_scores
예제 #4
0
    def test_from_file(self):
        for exp_scores, test_path in zip(self.scores, self.test_paths):
            for file_type in ('file like', 'file name'):
                fname = get_data_path(test_path)
                if file_type == 'file like':
                    with open(fname) as fh:
                        obs = OrdinationResults.from_file(fh)
                elif file_type == 'file name':
                    obs = OrdinationResults.from_file(fname)

                yield self.check_OrdinationResults_equal, obs, exp_scores
예제 #5
0
def body_site(coords, mapping_file):
    """Generates as many figures as samples in the coordinates file"""
    o = OrdinationResults.from_file(coords)

    # coordinates
    c_df = pd.DataFrame(o.site, o.site_ids)

    # mapping file
    mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str),
                     index_col='#SampleID')
    mf = mf.loc[o.site_ids]

    color_hmp_fecal = sns.color_palette('Paired', 12)[10]  # light brown
    color_agp_fecal = sns.color_palette('Paired', 12)[11]  # dark brown
    color_hmp_oral = sns.color_palette('Paired', 12)[0]    # light blue
    color_agp_oral = sns.color_palette('Paired', 12)[1]    # dark blue
    color_hmp_skin = sns.color_palette('Paired', 12)[2]    # light green
    color_agp_skin = sns.color_palette('Paired', 12)[3]    # dark green

    grp_colors = {'AGP-FECAL': color_agp_fecal,
                  'AGP-ORAL':  color_agp_oral,
                  'AGP-SKIN':  color_agp_skin,
                  'HMP-FECAL': color_hmp_fecal,
                  'GG-FECAL':  color_hmp_fecal,
                  'PGP-FECAL': color_hmp_fecal,
                  'HMP-ORAL':  color_hmp_oral,
                  'PGP-ORAL':  color_hmp_oral,
                  'HMP-SKIN':  color_hmp_skin,
                  'PGP-SKIN':  color_hmp_skin}

    for sample in mf.index:

        # plot categories as 50 slices with random zorder
        for grp, color in grp_colors.iteritems():
            sub_coords = c_df[mf.TITLE_BODY_SITE == grp].values
            for i in np.array_split(sub_coords, 50):
                plt.scatter(i[:, 0], i[:, 1], color=color,
                            edgecolor=np.asarray(color)*0.6, lw=LINE_WIDTH,
                            alpha=ALPHA, zorder=np.random.rand())

        # plot participant's dot
        plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                    color=grp_colors[mf.loc[sample]['TITLE_BODY_SITE']],
                    s=270, edgecolor='w', zorder=1)
        plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                    color=grp_colors[mf.loc[sample]['TITLE_BODY_SITE']],
                    s=250, edgecolor=np.asarray(
                    grp_colors[mf.loc[sample]['TITLE_BODY_SITE']])*0.6,
                    zorder=2)

        plt.axis('off')
        my_dpi = 72
        plt.savefig(sample+'.pdf', figsize=(1000/my_dpi, 1000/my_dpi),
                    dpi=my_dpi)
        plt.close()
예제 #6
0
def gradient(coords, mapping_file, color):
    """Generates as many figures as samples in the coordinates file"""
    o = OrdinationResults.from_file(coords)

    # coordinates
    c_df = pd.DataFrame(o.site, o.site_ids)

    # mapping file
    mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str),
                     index_col='#SampleID')
    mf = mf.loc[o.site_ids]
    mf[color] = mf[color].convert_objects(convert_numeric=True)

    numeric = mf[~pd.isnull(mf[color])]
    non_numeric = mf[pd.isnull(mf[color])]

    color_array = plt.cm.RdBu(numeric[color]/max(numeric[color]))

    for sample in mf.index:

        # plot numeric metadata as colored gradient
        ids = numeric.index
        x, y = c_df.loc[ids][0], c_df.loc[ids][1]
        plt.scatter(x, y, c=numeric[color], cmap=plt.get_cmap('RdBu'),
                    alpha=ALPHA, lw=LINE_WIDTH, edgecolor=color_array*0.6)

        # plt.colorbar()

        # plot non-numeric metadata as gray
        ids = non_numeric.index
        x, y = c_df.loc[ids][0], c_df.loc[ids][1]
        plt.scatter(x, y, c='0.5', alpha=ALPHA, lw=LINE_WIDTH, edgecolor='0.3')

        # plot individual's dot
        try:
            color_index = numeric.index.tolist().index(sample)
        except ValueError:
            color_index = None

        if color_index is None:
            _color = (0.5, 0.5, 0.5)
        else:
            _color = color_array[color_index]

        plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                    color=_color, s=270, edgecolor='w')
        plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                    color=_color, s=250, edgecolor=np.asarray(_color)*0.6)

        plt.axis('off')
        my_dpi = 72
        plt.savefig(sample+'.pdf', figsize=(1000/my_dpi, 1000/my_dpi),
                    dpi=my_dpi)
        plt.close()
예제 #7
0

def format_coords(coord_header, coords, eigvals, pct_var, headers=True):
    """formats coords given specified coords matrix etc."""
    result = []
    if (headers):
        result.append('pc vector number\t' +
                      '\t'.join(map(str, range(1,
                                               len(coords[0]) + 1))))
        for name, row in zip(coord_header, coords):
            result.append('\t'.join([name] + map(str, row)))
        result.append('')
        result.append('')
        result.append('eigvals\t' + '\t'.join(map(str, eigvals)))
        result.append('% variation explained\t' + '\t'.join(map(str, pct_var)))
    else:
        result = ['\t'.join(map(str, row)) for row in coords]
        result.append('')
    return '\n'.join(result)


if __name__ == "__main__":
    old_file = argv[1]
    new_file = argv[2]
    with open(old_file, 'U') as infile:
        with open(new_file, 'w') as outfile:
            res = OrdinationResults.from_file(infile)
            lines = format_coords(res.site_ids, res.site, res.eigvals,
                                  res.proportion_explained)
            outfile.write(lines)
예제 #8
0
def country(coords, mapping_file):
    """Generates as many figures as samples in the coordinates file"""
    o = OrdinationResults.from_file(coords)
    x, y = o.site[:, 0], o.site[:, 1]

    # coordinates
    c_df = pd.DataFrame(o.site, o.site_ids)

    # mapping file
    mf = pd.read_csv(mapping_file, '\t', converters=defaultdict(str),
                     index_col='#SampleID')
    mf = mf.loc[o.site_ids]

    color_Venezuela = sns.color_palette('Paired', 12)[10]
    color_Malawi = sns.color_palette('Paired', 12)[1]
    color_Western = sns.color_palette('Paired', 12)[4]
    color_Highlight = sns.color_palette('Paired', 12)[5]
    color_no_data = (0.5, 0.5, 0.5)

    grp_colors = OrderedDict()
    grp_colors['no_data'] = color_no_data
    grp_colors['Australia'] = color_Western
    grp_colors['Belgium'] = color_Western
    grp_colors['Canada'] = color_Western
    grp_colors['China'] = color_Western
    grp_colors['Finland'] = color_Western
    grp_colors['France'] = color_Western
    grp_colors['Germany'] = color_Western
    grp_colors['Great Britain'] = color_Western
    grp_colors['Ireland'] = color_Western
    grp_colors['Japan'] = color_Western
    grp_colors['Netherlands'] = color_Western
    grp_colors['New Zealand'] = color_Western
    grp_colors['Norway'] = color_Western
    grp_colors['Scotland'] = color_Western
    grp_colors['Spain'] = color_Western
    grp_colors['Switzerland'] = color_Western
    grp_colors['Thailand'] = color_Western
    grp_colors['United Arab Emirates'] = color_Western
    grp_colors['United Kingdom'] = color_Western
    grp_colors['United States of America'] = color_Western
    grp_colors['Malawi'] = color_Malawi
    grp_colors['Venezuela'] = color_Venezuela

    for sample in mf.index:

        # countour plot superimposed
        sns.kdeplot(x, y, cmap='bone')
        sns.set_context(rc={"lines.linewidth": 0.75})

        # change particapant's country's color to color_Highlight unless
        # country is Venezuela or Malawi
        if (mf.loc[sample]['COUNTRY'] != 'Malawi') & (
                mf.loc[sample]['COUNTRY'] != 'Venezuela'):
            grp_colors[mf.loc[sample]['COUNTRY']] = color_Highlight

        # plot each country except participant's according to colors above
        for grp, color in grp_colors.iteritems():
            if grp == mf.loc[sample]['COUNTRY']:
                continue
            sub_coords = c_df[mf.COUNTRY == grp]
            plt.scatter(sub_coords[0], sub_coords[1], color=color,
                        edgecolor=np.asarray(color)*0.6, lw=LINE_WIDTH,
                        alpha=ALPHA)

        # now plot participant's country
        grp = mf.loc[sample]['COUNTRY']
        color = grp_colors[grp]
        sub_coords = c_df[mf.COUNTRY == grp]
        plt.scatter(sub_coords[0], sub_coords[1], color=color,
                    edgecolor=np.asarray(color)*0.6, lw=LINE_WIDTH,
                    alpha=ALPHA)

        # plot participant's dot
        plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                    color=grp_colors[mf.loc[sample]['COUNTRY']],
                    s=270, edgecolor='w', zorder=1)
        plt.scatter(c_df.loc[sample][0], c_df.loc[sample][1],
                    color=grp_colors[mf.loc[sample]['COUNTRY']],
                    s=250, edgecolor=np.asarray(grp_colors[mf.loc[sample]
                                                ['COUNTRY']])*0.6, zorder=2)

        # reset particapant's country's color to color_Western unless country
        # is Venezuela or Malawi
        if (mf.loc[sample]['COUNTRY'] != 'Malawi') & (
                mf.loc[sample]['COUNTRY'] != 'Venezuela'):
            grp_colors[mf.loc[sample]['COUNTRY']] = color_Western

        plt.axis('off')
        my_dpi = 72
        plt.savefig(sample+'.pdf', figsize=(1000/my_dpi, 1000/my_dpi),
                    dpi=my_dpi)
        plt.close()