class FractionTaxaBarStack(Graph): """Comparing all fractions across all pools in a barstack""" short_name = 'fraction_taxa_barstack' def plot(self): self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) self.frame = self.frame.transpose() self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_title('Species relative abundances per fraction per pool') axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0,100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.20), fancybox=True, shadow=True, ncol=5) # Save it # self.save_plot(fig, axes, width=24.0, height=14.0, bottom=0.30, top=0.97, left=0.04, right=0.98) self.frame.to_csv(self.csv_path) pyplot.close(fig)
class TaxonomyPlot(Graph): short_name = 'fresh_taxonomy' bottom = 0.35 left = 0.1 def plot(self): # First the no hits # no_hits = {"No hits": len(self.parent.no_hit_genes)} # The marine hits # fams = OrderedDict([(f,0) for f in families]) for g in self.parent.best_is_marine: fams[g.hits[-1]['taxonomy']] += 1 # Then the ncbi hits # self.categories = OrderedDict((('Life', 0), ('Bacteria', 0), ('Proteobacteria', 0), ('Alphaproteobacteria', 0), ('SAR11 cluster', 0), ('Candidatus Pelagibacter', 0))) for g in self.parent.best_is_other: self.categories[g.best_tax] += 1 # Frame # self.frame = OrderedDict() self.frame.update(no_hits) self.frame.update(self.categories) self.frame.update(fams) self.frame = pandas.Series(self.frame) # Plot # axes = self.frame.plot(kind='bar', color='gray') fig = pyplot.gcf() axes.set_title("Taxonomy distribution for the best hit against refseq for all freshwater genes") axes.set_ylabel("Number of best hits with this taxonomy") axes.xaxis.grid(True) self.save_plot(fig, axes, sep=('y',)) pyplot.close(fig)
class TaxonomyPlot(Graph): short_name = 'fresh_taxonomy' bottom = 0.35 left = 0.1 def plot(self): # First the no hits # no_hits = {"No hits": len(self.parent.no_hit_genes)} # The marine hits # fams = OrderedDict([(f, 0) for f in families]) for g in self.parent.best_is_marine: fams[g.hits[-1]['taxonomy']] += 1 # Then the ncbi hits # self.categories = OrderedDict( (('Life', 0), ('Bacteria', 0), ('Proteobacteria', 0), ('Alphaproteobacteria', 0), ('SAR11 cluster', 0), ('Candidatus Pelagibacter', 0))) for g in self.parent.best_is_other: self.categories[g.best_tax] += 1 # Frame # self.frame = OrderedDict() self.frame.update(no_hits) self.frame.update(self.categories) self.frame.update(fams) self.frame = pandas.Series(self.frame) # Plot # axes = self.frame.plot(kind='bar', color='gray') fig = pyplot.gcf() axes.set_title( "Taxonomy distribution for the best hit against refseq for all freshwater genes" ) axes.set_ylabel("Number of best hits with this taxonomy") axes.xaxis.grid(True) self.save_plot(fig, axes, sep=('y', )) pyplot.close(fig)
class FractionTaxaBarStack(Graph): """Comparing all fractions across all pools in a barstack""" short_name = 'fraction_taxa_barstack' def plot(self): self.frame = OrderedDict( (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) self.frame = self.frame.transpose() self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_title('Species relative abundances per fraction per pool') axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0, 100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.20), fancybox=True, shadow=True, ncol=5) # Save it # self.save_plot(fig, axes, width=24.0, height=14.0, bottom=0.30, top=0.97, left=0.04, right=0.98) self.frame.to_csv(self.csv_path) pyplot.close(fig)
class FractionTaxaBarStack(Graph): short_name = 'fraction_taxa_barstack' bottom = 0.4 top = 0.95 left = 0.1 right = 0.95 formats = ('pdf', 'eps') def plot(self): # Make Frame # self.frame = OrderedDict( (('%s - %s' % (p, f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) # Rename # new_names = { u"run001-pool01 - low": "2-step PCR low", u"run001-pool02 - low": "2-step PCR low", u"run001-pool03 - low": "2-step PCR low", u"run001-pool04 - low": "1-step PCR low", u"run002-pool01 - low": "New chem low", u"run001-pool01 - med": "2-step PCR med", u"run001-pool02 - med": "2-step PCR med", u"run001-pool03 - med": "2-step PCR med", u"run001-pool04 - med": "1-step PCR med", u"run002-pool01 - med": "New chem med", u"run001-pool01 - big": "2-step PCR high", u"run001-pool02 - big": "2-step PCR high", u"run001-pool03 - big": "2-step PCR high", u"run001-pool04 - big": "1-step PCR high", u"run002-pool01 - big": "New chem high", } self.frame.rename(columns=new_names, inplace=True) self.frame = self.frame.transpose() # Group low abundant into 'others' # low_abundance = self.frame.sum() < 30000 other_count = self.frame.loc[:, low_abundance].sum(axis=1) self.frame = self.frame.loc[:, ~low_abundance] self.frame['Others'] = other_count # Normalize # self.frame = self.frame.apply(lambda x: 100 * x / x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0, 100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, prop={'size': 10}) # Font size # axes.tick_params(axis='x', which='major', labelsize=11) # Save it # self.save_plot(fig, axes) self.frame.to_csv(self.csv_path) pyplot.close(fig)
zones['SUD'] = np.array(sud) zones['CSUD'] = np.array(csud) zones['CNOR'] = np.array(cnor) zones['NORD'] = np.array(nord) Z = pd.DataFrame.from_dict(zones).set_index(rng) Z.plot() ###### Z = data.resample('D').mean() zones = pd.DataFrame(Z[Z.columns[[6,7,10,13,15,16,18,21,22,23,24,25]]]) zones2 = zones.ix[zones.index.month <= 9] zones.plot() zones[zones.columns[[0,4,6]]].plot() zones[zones.columns[[0,4]]].plot(title='PUN vs FRAN') zones[zones.columns[[0,6]]].plot(title='PUN vs NORD') zones[zones.columns[[6,4]]].plot(title='NORD vs FRAN') zones[zones.columns[0]].corr(zones[zones.columns[4]]) zones[zones.columns[0]].corr(zones[zones.columns[6]]) zones[zones.columns[4]].corr(zones[zones.columns[6]]) #### FRAN normalized: nor_fran = (zones[zones.columns[4]] - zones[zones.columns[4]].mean())/zones[zones.columns[4]].std() plt.figure() plt.plot(nor_fran)
### EPEX FR from 2016-09-30 to 2016-11-03: nd = [41.78, 38.19, 32.48, 36.04, 42.02,42.68 ,48.28, 57.29208333 , 44.35375, 36.705,56.70208333, 71.21208333, 62.81041667, 64.25, 64.10, 44.28, 40.02, 56.41,66.94, 67.69, 76.30, 72.95, 55.72, 44.57, 72.63, 79.92, 70.53, 61.49,58.38,52.12,40.95,48.95,47.63,64.01,68.82,74.08,50.90,43.47,125.67,114.82] for n in nd: fr.append(n) fsm['francia'] = fr fsm['svizzera'] = sv fsm['pun'] = pun[:287] fsm = pd.DataFrame.from_dict(fsm) fsm.plot() ### from this I'm very doubtful that flows actually correlate wth prices... ### except in the last week where something anomalous is definitely happening ############## analysis of correlations ############### fsm = fsm.set_index(pun.index) cors = [] for i in range(2,pun.shape[0],1): cors.append(np.corrcoef(np.array(pun)[:i],np.array(fsm).ravel()[:i])[1,0]) compl_cors = [] for i in range(2,pun.shape[0],1): compl_cors.append(np.corrcoef(np.array(pun)[pun.shape[0] - i:],np.array(fsm).ravel()[pun.shape[0] - i:])[1,0]) plt.figure() plt.plot(np.array(cors)) plt.figure()
class FractionTaxaBarStack(Graph): """This is figure 3 of the paper""" short_name = 'fraction_taxa_barstack' bottom = 0.4 top = 0.95 left = 0.1 right = 0.95 formats = ('pdf', 'eps') def plot(self): # Make Frame # self.frame = OrderedDict((('%s - %s' % (p,f), getattr(p.fractions, f).rdp.phyla) for f in ('low', 'med', 'big') for p in self.parent.pools)) self.frame = pandas.DataFrame(self.frame) self.frame = self.frame.fillna(0) # Rename # new_names = { u"run001-pool01 - low": "2-step PCR low", u"run001-pool02 - low": "2-step PCR low", u"run001-pool03 - low": "2-step PCR low", u"run001-pool04 - low": "1-step PCR low", u"run002-pool01 - low": "New chem low", u"run001-pool01 - med": "2-step PCR med", u"run001-pool02 - med": "2-step PCR med", u"run001-pool03 - med": "2-step PCR med", u"run001-pool04 - med": "1-step PCR med", u"run002-pool01 - med": "New chem med", u"run001-pool01 - big": "2-step PCR high", u"run001-pool02 - big": "2-step PCR high", u"run001-pool03 - big": "2-step PCR high", u"run001-pool04 - big": "1-step PCR high", u"run002-pool01 - big": "New chem high", } self.frame.rename(columns=new_names, inplace=True) self.frame = self.frame.transpose() # Group low abundant into 'others' # low_abundance = self.frame.sum() < 30000 other_count = self.frame.loc[:, low_abundance].sum(axis=1) self.frame = self.frame.loc[:, ~low_abundance] self.frame['Others'] = other_count # Normalize # self.frame = self.frame.apply(lambda x: 100*x/x.sum(), axis=1) # Sort the table by sum # sums = self.frame.sum() sums.sort(ascending=False) self.frame = self.frame.reindex_axis(sums.keys(), axis=1) # Plot # fig = pyplot.figure() axes = self.frame.plot(kind='bar', stacked=True, color=cool_colors) fig = pyplot.gcf() # Other # axes.set_ylabel('Relative abundances in percent') axes.xaxis.grid(False) axes.yaxis.grid(False) axes.set_ylim([0,100]) # Put a legend below current axis axes.legend(loc='upper center', bbox_to_anchor=(0.5, -0.40), fancybox=True, shadow=True, ncol=5, prop={'size':10}) # Font size # axes.tick_params(axis='x', which='major', labelsize=11) # Save it # self.save_plot(fig, axes) self.frame.to_csv(self.csv_path) pyplot.close(fig)
41.78, 38.19, 32.48, 36.04, 42.02, 42.68, 48.28, 57.29208333, 44.35375, 36.705, 56.70208333, 71.21208333, 62.81041667, 64.25, 64.10, 44.28, 40.02, 56.41, 66.94, 67.69, 76.30, 72.95, 55.72, 44.57, 72.63, 79.92, 70.53, 61.49, 58.38, 52.12, 40.95, 48.95, 47.63, 64.01, 68.82, 74.08, 50.90, 43.47, 125.67, 114.82 ] for n in nd: fr.append(n) fsm['francia'] = fr fsm['svizzera'] = sv fsm['pun'] = pun[:287] fsm = pd.DataFrame.from_dict(fsm) fsm.plot( ) ### from this I'm very doubtful that flows actually correlate wth prices... ### except in the last week where something anomalous is definitely happening ############## analysis of correlations ############### fsm = fsm.set_index(pun.index) cors = [] for i in range(2, pun.shape[0], 1): cors.append( np.corrcoef(np.array(pun)[:i], np.array(fsm).ravel()[:i])[1, 0]) compl_cors = [] for i in range(2, pun.shape[0], 1): compl_cors.append( np.corrcoef( np.array(pun)[pun.shape[0] - i:],