コード例 #1
0
ファイル: aggregate_plots.py プロジェクト: Xiuying/illumitag
class FractionTaxaBarStack(Graph):
    """Comparing all fractions across all pools in a barstack"""
    short_name = 'fraction_taxa_barstack'

    def plot(self):
        self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla)
                     for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        self.frame = self.frame.transpose()
        self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_title('Species relative abundances per fraction per pool')
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0,100])
        # Put a legend below current axis
        axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.20), fancybox=True, shadow=True, ncol=5)
        # Save it #
        self.save_plot(fig, axes, width=24.0, height=14.0, bottom=0.30, top=0.97, left=0.04, right=0.98)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)
コード例 #2
0
ファイル: duplications.py プロジェクト: xapple/ld12
class TaxonomyPlot(Graph):
    short_name = 'fresh_taxonomy'
    bottom = 0.35
    left   = 0.1

    def plot(self):
        # First the no hits #
        no_hits = {"No hits": len(self.parent.no_hit_genes)}
        # The marine hits #
        fams = OrderedDict([(f,0) for f in families])
        for g in self.parent.best_is_marine: fams[g.hits[-1]['taxonomy']] += 1
        # Then the ncbi hits #
        self.categories = OrderedDict((('Life',                    0),
                                       ('Bacteria',                0),
                                       ('Proteobacteria',          0),
                                       ('Alphaproteobacteria',     0),
                                       ('SAR11 cluster',           0),
                                       ('Candidatus Pelagibacter', 0)))
        for g in self.parent.best_is_other: self.categories[g.best_tax] += 1
        # Frame #
        self.frame = OrderedDict()
        self.frame.update(no_hits)
        self.frame.update(self.categories)
        self.frame.update(fams)
        self.frame = pandas.Series(self.frame)
        # Plot #
        axes = self.frame.plot(kind='bar', color='gray')
        fig = pyplot.gcf()
        axes.set_title("Taxonomy distribution for the best hit against refseq for all freshwater genes")
        axes.set_ylabel("Number of best hits with this taxonomy")
        axes.xaxis.grid(True)
        self.save_plot(fig, axes, sep=('y',))
        pyplot.close(fig)
コード例 #3
0
ファイル: duplications.py プロジェクト: xapple/ld12
class TaxonomyPlot(Graph):
    short_name = 'fresh_taxonomy'
    bottom = 0.35
    left = 0.1

    def plot(self):
        # First the no hits #
        no_hits = {"No hits": len(self.parent.no_hit_genes)}
        # The marine hits #
        fams = OrderedDict([(f, 0) for f in families])
        for g in self.parent.best_is_marine:
            fams[g.hits[-1]['taxonomy']] += 1
        # Then the ncbi hits #
        self.categories = OrderedDict(
            (('Life', 0), ('Bacteria', 0), ('Proteobacteria', 0),
             ('Alphaproteobacteria', 0), ('SAR11 cluster',
                                          0), ('Candidatus Pelagibacter', 0)))
        for g in self.parent.best_is_other:
            self.categories[g.best_tax] += 1
        # Frame #
        self.frame = OrderedDict()
        self.frame.update(no_hits)
        self.frame.update(self.categories)
        self.frame.update(fams)
        self.frame = pandas.Series(self.frame)
        # Plot #
        axes = self.frame.plot(kind='bar', color='gray')
        fig = pyplot.gcf()
        axes.set_title(
            "Taxonomy distribution for the best hit against refseq for all freshwater genes"
        )
        axes.set_ylabel("Number of best hits with this taxonomy")
        axes.xaxis.grid(True)
        self.save_plot(fig, axes, sep=('y', ))
        pyplot.close(fig)
コード例 #4
0
class FractionTaxaBarStack(Graph):
    """Comparing all fractions across all pools in a barstack"""
    short_name = 'fraction_taxa_barstack'

    def plot(self):
        self.frame = OrderedDict(
            (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla)
             for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        self.frame = self.frame.transpose()
        self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_title('Species relative abundances per fraction per pool')
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0, 100])
        # Put a legend below current axis
        axes.legend(loc='upper center',
                    bbox_to_anchor=(0.5, -0.20),
                    fancybox=True,
                    shadow=True,
                    ncol=5)
        # Save it #
        self.save_plot(fig,
                       axes,
                       width=24.0,
                       height=14.0,
                       bottom=0.30,
                       top=0.97,
                       left=0.04,
                       right=0.98)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)
コード例 #5
0
class FractionTaxaBarStack(Graph):
    short_name = 'fraction_taxa_barstack'
    bottom = 0.4
    top = 0.95
    left = 0.1
    right = 0.95
    formats = ('pdf', 'eps')

    def plot(self):
        # Make Frame #
        self.frame = OrderedDict(
            (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla)
             for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        # Rename #
        new_names = {
            u"run001-pool01 - low": "2-step PCR low",
            u"run001-pool02 - low": "2-step PCR low",
            u"run001-pool03 - low": "2-step PCR low",
            u"run001-pool04 - low": "1-step PCR low",
            u"run002-pool01 - low": "New chem low",
            u"run001-pool01 - med": "2-step PCR med",
            u"run001-pool02 - med": "2-step PCR med",
            u"run001-pool03 - med": "2-step PCR med",
            u"run001-pool04 - med": "1-step PCR med",
            u"run002-pool01 - med": "New chem med",
            u"run001-pool01 - big": "2-step PCR high",
            u"run001-pool02 - big": "2-step PCR high",
            u"run001-pool03 - big": "2-step PCR high",
            u"run001-pool04 - big": "1-step PCR high",
            u"run002-pool01 - big": "New chem high",
        }
        self.frame.rename(columns=new_names, inplace=True)
        self.frame = self.frame.transpose()
        # Group low abundant into 'others' #
        low_abundance = self.frame.sum() < 30000
        other_count = self.frame.loc[:, low_abundance].sum(axis=1)
        self.frame = self.frame.loc[:, ~low_abundance]
        self.frame['Others'] = other_count
        # Normalize #
        self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0, 100])
        # Put a legend below current axis
        axes.legend(loc='upper center',
                    bbox_to_anchor=(0.5, -0.40),
                    fancybox=True,
                    shadow=True,
                    ncol=5,
                    prop={'size': 10})
        # Font size #
        axes.tick_params(axis='x', which='major', labelsize=11)
        # Save it #
        self.save_plot(fig, axes)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)
コード例 #6
0
zones['SUD'] = np.array(sud)
zones['CSUD'] = np.array(csud)
zones['CNOR'] = np.array(cnor)
zones['NORD'] = np.array(nord)

Z = pd.DataFrame.from_dict(zones).set_index(rng)
    
Z.plot()
######
Z = data.resample('D').mean()

zones = pd.DataFrame(Z[Z.columns[[6,7,10,13,15,16,18,21,22,23,24,25]]])

zones2 = zones.ix[zones.index.month <= 9]

zones.plot()
zones[zones.columns[[0,4,6]]].plot()
zones[zones.columns[[0,4]]].plot(title='PUN vs FRAN')
zones[zones.columns[[0,6]]].plot(title='PUN vs NORD')
zones[zones.columns[[6,4]]].plot(title='NORD vs FRAN')

zones[zones.columns[0]].corr(zones[zones.columns[4]])
zones[zones.columns[0]].corr(zones[zones.columns[6]])
zones[zones.columns[4]].corr(zones[zones.columns[6]])

#### FRAN normalized:
nor_fran = (zones[zones.columns[4]] - zones[zones.columns[4]].mean())/zones[zones.columns[4]].std() 

plt.figure()
plt.plot(nor_fran)
コード例 #7
0
ファイル: FS.py プロジェクト: davideflo/Python_code
    
###  EPEX FR from 2016-09-30 to 2016-11-03:
nd = [41.78, 38.19, 32.48, 36.04, 42.02,42.68 ,48.28,
      57.29208333	, 44.35375,	36.705,56.70208333, 71.21208333, 62.81041667, 64.25, 64.10, 44.28, 40.02, 56.41,66.94,
      67.69,	76.30,	72.95,	55.72,	44.57,	72.63,
      79.92, 70.53, 61.49,58.38,52.12,40.95,48.95,47.63,64.01,68.82,74.08,50.90,43.47,125.67,114.82]
      
for n in nd:
    fr.append(n)
fsm['francia'] = fr
fsm['svizzera'] = sv
fsm['pun'] = pun[:287]

fsm = pd.DataFrame.from_dict(fsm)

fsm.plot() ### from this I'm very doubtful that flows actually correlate wth prices... 
           ### except in the last week where something anomalous is definitely happening

############## analysis of correlations ###############
fsm = fsm.set_index(pun.index)
cors = []
for i in range(2,pun.shape[0],1):
    cors.append(np.corrcoef(np.array(pun)[:i],np.array(fsm).ravel()[:i])[1,0])

compl_cors = []
for i in range(2,pun.shape[0],1):
    compl_cors.append(np.corrcoef(np.array(pun)[pun.shape[0] - i:],np.array(fsm).ravel()[pun.shape[0] - i:])[1,0])
    
plt.figure()
plt.plot(np.array(cors))
plt.figure()
コード例 #8
0
ファイル: eval_plots.py プロジェクト: Xiuying/illumitag
class FractionTaxaBarStack(Graph):
    """This is figure 3 of the paper"""

    short_name = 'fraction_taxa_barstack'
    bottom = 0.4
    top = 0.95
    left = 0.1
    right = 0.95
    formats = ('pdf', 'eps')

    def plot(self):
        # Make Frame #
        self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla)
                     for f in ('low', 'med', 'big') for p in self.parent.pools))
        self.frame = pandas.DataFrame(self.frame)
        self.frame = self.frame.fillna(0)
        # Rename #
        new_names = {
            u"run001-pool01 - low": "2-step PCR low",
            u"run001-pool02 - low": "2-step PCR low",
            u"run001-pool03 - low": "2-step PCR low",
            u"run001-pool04 - low": "1-step PCR low",
            u"run002-pool01 - low": "New chem low",
            u"run001-pool01 - med": "2-step PCR med",
            u"run001-pool02 - med": "2-step PCR med",
            u"run001-pool03 - med": "2-step PCR med",
            u"run001-pool04 - med": "1-step PCR med",
            u"run002-pool01 - med": "New chem med",
            u"run001-pool01 - big": "2-step PCR high",
            u"run001-pool02 - big": "2-step PCR high",
            u"run001-pool03 - big": "2-step PCR high",
            u"run001-pool04 - big": "1-step PCR high",
            u"run002-pool01 - big": "New chem high",
        }
        self.frame.rename(columns=new_names, inplace=True)
        self.frame = self.frame.transpose()
        # Group low abundant into 'others' #
        low_abundance = self.frame.sum() < 30000
        other_count = self.frame.loc[:, low_abundance].sum(axis=1)
        self.frame = self.frame.loc[:, ~low_abundance]
        self.frame['Others'] = other_count
        # Normalize #
        self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1)
        # Sort the table by sum #
        sums = self.frame.sum()
        sums.sort(ascending=False)
        self.frame = self.frame.reindex_axis(sums.keys(), axis=1)
        # Plot #
        fig = pyplot.figure()
        axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors)
        fig = pyplot.gcf()
        # Other #
        axes.set_ylabel('Relative abundances in percent')
        axes.xaxis.grid(False)
        axes.yaxis.grid(False)
        axes.set_ylim([0,100])
        # Put a legend below current axis
        axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, prop={'size':10})
        # Font size #
        axes.tick_params(axis='x', which='major', labelsize=11)
        # Save it #
        self.save_plot(fig, axes)
        self.frame.to_csv(self.csv_path)
        pyplot.close(fig)
コード例 #9
0
    41.78, 38.19, 32.48, 36.04, 42.02, 42.68, 48.28, 57.29208333, 44.35375,
    36.705, 56.70208333, 71.21208333, 62.81041667, 64.25, 64.10, 44.28, 40.02,
    56.41, 66.94, 67.69, 76.30, 72.95, 55.72, 44.57, 72.63, 79.92, 70.53,
    61.49, 58.38, 52.12, 40.95, 48.95, 47.63, 64.01, 68.82, 74.08, 50.90,
    43.47, 125.67, 114.82
]

for n in nd:
    fr.append(n)
fsm['francia'] = fr
fsm['svizzera'] = sv
fsm['pun'] = pun[:287]

fsm = pd.DataFrame.from_dict(fsm)

fsm.plot(
)  ### from this I'm very doubtful that flows actually correlate wth prices...
### except in the last week where something anomalous is definitely happening

############## analysis of correlations ###############
fsm = fsm.set_index(pun.index)
cors = []
for i in range(2, pun.shape[0], 1):
    cors.append(
        np.corrcoef(np.array(pun)[:i],
                    np.array(fsm).ravel()[:i])[1, 0])

compl_cors = []
for i in range(2, pun.shape[0], 1):
    compl_cors.append(
        np.corrcoef(
            np.array(pun)[pun.shape[0] - i:],