def Output(self, dframe): f = open('my_output.csv', 'w') for rootNb, rootNm in enumerate(self.TruncNames): print >> f, 'TruncDics: ', rootNb,' : ', rootNm for attr in self.attrModif: if self.OrderOrNot[attr] == True: # normal output for i in range(1, self.CritDiff): myPandaUtilities.myfilter(dframe,['RootNb',rootNb,'attrModif',attr,'branchWeight',i],['fileName','RootNb','attrModif','from','to']).to_csv('my_output.csv', mode='a', header=False) else: # output range print >> f ,'TruncDics: ', rootNb,' : ', rootNm, 'Range: ', self.OrderOrNot[attr][1][0], self.OrderOrNot[attr][1][1], self.OrderOrNot[attr][1][2] for i in range(1, self.CritDiff): myPandaUtilities.myfilter(dframe,['RootNb',rootNb,'attrModif',attr,'branchWeight',i],['fileName']).to_csv('my_output.csv', mode='a', header=False) print >> f, 'FileNumber', 'FileName', 'RootNb', 'AttributeNb' ,'from', 'to'
def Study(self): # Filter out data with a sufficiently big statistics _count = [] _line = [] _trip = [] for line in [1, 2]: #self.LineIdList: for trip in self.TripIdList: df = myPandaUtilities.myfilter( self.data, ['Line_id', line, 'Trip_id', trip]) tmpDF = df.describe() if (tmpDF['Reven']['count'] > 0): _count.append(tmpDF['Reven']['count']) _line.append(line) _trip.append(trip) dframe = pd.DataFrame({ 'count': _count, 'Line_id': _line, 'Trip_id': _trip }) myPandaUtilities.myLazyDispl(dframe) del _count, _line, _trip dframe = dframe.sort('count', ascending=False) dframe = dframe[dframe['count'] > self.CutOff] return dframe[['Line_id', 'Trip_id']]
def plotAttrVSDays( self ): tmp0 =[] _line = 1 for tmp in self.AttrList1: tmp0.append( str(tmp) +'_'+str(self._line) ) dff = [] for tmp1 in self.DaysList: df = myPandaUtilities.myfilter(self.data,['Line_id',self._line,'Trip_id',tmp,'DayOfWeek',tmp1],['DayOfWeek','Ride_load']) dff.append(df) dff = pd.concat(dff) dff['DayOfWeek'] = dff['DayOfWeek'].apply( fctChgDays ) dff=dff.sort('DayOfWeek') if ( (dff.shape[0] != 0 ) & (len(dff['DayOfWeek'].unique()) > 2) ): dfave = dff.groupby('DayOfWeek').mean() # dfvar = dff.groupby('DayOfWeek').std() # dfave.rename(columns={'Ride_load': 'mean'}, inplace=True) # dfvar.rename(columns={'Ride_load': 'std'}, inplace=True) # df = dfave.join(dfvar) df = df.fillna(0.00000001) print ' ---df--- ', df, dff['DayOfWeek'].unique() print "tmp0=============", tmp0 plt.errorbar( dff['DayOfWeek'].unique(), df['mean'], df['std'], linestyle="dashed", marker="o",zorder=1) plt.legend(tmp0, loc='upper left') #'lower left' plt.draw()
def plotAttrVSDays( self ): tmp0 =[] _line = 1 for tmp in self.AttrList1: tmp0.append( str(tmp) +'_'+str(self._line) ) dff = [] for tmp1 in self.DaysList: df = myPandaUtilities.myfilter(self.data,['Line_id',self._line,'Trip_id',tmp,'DayOfWeek',tmp1],['DayOfWeek','Ride_load']) dff.append(df) dff = pd.concat(dff) dff['DayOfWeek'] = dff['DayOfWeek'].apply( fctChgDays ) dff=dff.sort('DayOfWeek') if ( (dff.shape[0] != 0 ) & (len(dff['DayOfWeek'].unique()) > 2) ): dfave = dff.groupby('DayOfWeek').mean() # dfvar = dff.groupby('DayOfWeek').std() # dfave.rename(columns={'Ride_load': 'mean'}, inplace=True) # dfvar.rename(columns={'Ride_load': 'std'}, inplace=True) # df = dfave.join(dfvar) df = df.fillna(0.00000001) print ' ---df--- ', df, dff['DayOfWeek'].unique() print "tmp0=============", tmp0 plt.errorbar( dff['DayOfWeek'].unique(), df['mean'], df['std'], linestyle="dashed", marker="o",zorder=1) plt.legend(tmp0, loc='upper left') #'lower left' plt.draw()
def plotHistoVSDays( self ): legend = '' dff = [] for day in self.DaysList: df = myPandaUtilities.myfilter(self.data,['DayOfWeek',day],['DayOfWeek',str(self.testAttr)]) dff.append(df) self.plotGeneration(dff, legend)
def plotHistoVSDays( self ): legend = '' dff = [] for day in self.DaysList: df = myPandaUtilities.myfilter(self.data,['DayOfWeek',day],['DayOfWeek',str(self.testAttr)]) dff.append(df) self.plotGeneration(dff, legend)
def plotAttrVSDays( self ): legend =[] for trip in self.AttrList1: for line in self.AttrList2: legend.append(str(trip)+'_'+str(line)) dff = [] for day in self.DaysList: df = myPandaUtilities.myfilter(self.data,['Line_id',line,'Trip_id',trip,'DayOfWeek',day],['DayOfWeek',str(self.testAttr)]) #'Ride_load']) dff.append(df) self.plotGeneration(dff, legend)
def plotAttrVSDays( self ): legend =[] for trip in self.AttrList1: for line in self.AttrList2: legend.append(str(trip)+'_'+str(line)) dff = [] for day in self.DaysList: df = myPandaUtilities.myfilter(self.data,['Line_id',line,'Trip_id',trip,'DayOfWeek',day],['DayOfWeek',str(self.testAttr)]) #'Ride_load']) dff.append(df) self.plotGeneration(dff, legend)
def plotLineVSDays( self ): legend =[] _line = 1 isplotted = False for trip in self.AttrList1: legend.append( str(trip) +'_'+str(self._line) ) dff = [] for day in self.DaysList: df = myPandaUtilities.myfilter(self.data,['Line_id',self._line,'Trip_id',trip,'DayOfWeek',day],['DayOfWeek',str(self.testAttr)]) dff.append(df) if (pd.concat(dff).shape[0] > 5): self.plotGeneration(dff,legend) isplotted = True return isplotted
def plotLineVSDays( self ): legend =[] _line = 1 isplotted = False for trip in self.AttrList1: legend.append( str(trip) +'_'+str(self._line) ) dff = [] for day in self.DaysList: df = myPandaUtilities.myfilter(self.data,['Line_id',self._line,'Trip_id',trip,'DayOfWeek',day],['DayOfWeek',str(self.testAttr)]) dff.append(df) if (pd.concat(dff).shape[0] > 5): self.plotGeneration(dff,legend) isplotted = True return isplotted
def Study(self): # Filter out data with a sufficiently big statistics _count = [] _line = [] _trip = [] for line in [1,2]: #self.LineIdList: for trip in self.TripIdList: df = myPandaUtilities.myfilter(self.data,['Line_id',line,'Trip_id',trip]) tmpDF = df.describe() if ( tmpDF['Reven']['count'] > 0 ) : _count.append( tmpDF['Reven']['count'] ) _line.append( line ) _trip.append( trip ) dframe = pd.DataFrame({ 'count' : _count , 'Line_id' : _line, 'Trip_id' : _trip }) myPandaUtilities.myLazyDispl(dframe) del _count, _line, _trip dframe=dframe.sort('count',ascending=False) dframe = dframe[ dframe['count'] > self.CutOff ] return dframe[ ['Line_id','Trip_id' ] ]
def plot2AttrVSDaysHisto( self ): dff = [] for tmp1 in self.DaysList: df = myPandaUtilities.myfilter(self.data,['DayOfWeek',tmp1],['DayOfWeek','Ride_load']) dff.append(df) dff = pd.concat(dff) dff['DayOfWeek'] = dff['DayOfWeek'].apply( fctChgDays ) dff=dff.sort('DayOfWeek') myPandaUtilities.myLazyDispl(dff) dfave = dff.groupby('DayOfWeek').mean() # dfvar = dff.groupby('DayOfWeek').std() # dfave.rename(columns={'Ride_load': 'mean'}, inplace=True) # dfvar.rename(columns={'Ride_load': 'std'}, inplace=True) # df = dfave.join(dfvar) df = df.fillna(0.00000001) # Dframe myPandaUtilities.myLazyDispl(df) #Dframe) #print df.head(10) plt.errorbar( dff['DayOfWeek'].unique(), df['mean'], df['std'], linestyle="dashed", marker="o",zorder=1) plt.legend('histo', loc='upper left' ) #'lower left' title='Histooo', plt.draw()
def plot2AttrVSDaysHisto( self ): dff = [] for tmp1 in self.DaysList: df = myPandaUtilities.myfilter(self.data,['DayOfWeek',tmp1],['DayOfWeek','Ride_load']) dff.append(df) dff = pd.concat(dff) dff['DayOfWeek'] = dff['DayOfWeek'].apply( fctChgDays ) dff=dff.sort('DayOfWeek') myPandaUtilities.myLazyDispl(dff) dfave = dff.groupby('DayOfWeek').mean() # dfvar = dff.groupby('DayOfWeek').std() # dfave.rename(columns={'Ride_load': 'mean'}, inplace=True) # dfvar.rename(columns={'Ride_load': 'std'}, inplace=True) # df = dfave.join(dfvar) df = df.fillna(0.00000001) # Dframe myPandaUtilities.myLazyDispl(df) #Dframe) #print df.head(10) plt.errorbar( dff['DayOfWeek'].unique(), df['mean'], df['std'], linestyle="dashed", marker="o",zorder=1) plt.legend('histo', loc='upper left' ) #'lower left' title='Histooo', plt.draw()
def AnalysisTree(self, dframe): for n, trunc in enumerate([0]) : #enumerate(self.listTruncNbs): #n = n + 1 print n, trunc print self.truncCoords[n] print 'DBG', n df = dframe[dframe['RootNb']==n] filNames1 = [] filCoords1 = [] filTos1 = [] filNames2 = [] filCoords2 = [] filTos2 = [] filNames3 = [] filCoords3 = [] filTos3 = [] for i in df['fileName'].unique(): coords = [] tos = [] #myfilter(df,arrFilter,arrFields = []): tmp = myPandaUtilities.myfilter( df, ['fileName',i], ['fileNum','attrModif','toNb','branchWeight'] ) #myPandaUtilities.myLazyDispl(tmp) if ( tmp.shape[0] == 1 ): #print 'DBG0', tmp['fileNum'] filCoords1.append( [ tmp['attrModif'].iloc[0] ] ) filTos1.append( [ tmp['toNb'].iloc[0] ] ) # tmp.iloc[:,1] # filNames1.append( i ) if ( tmp.shape[0] == 2 ): filCoords2.append( [ tmp['attrModif'].iloc[0], tmp['attrModif'].iloc[1] ] ) filTos2.append( [ tmp['toNb'].iloc[0], tmp['toNb'].iloc[1]] ) filNames2.append( i ) if ( tmp.shape[0] == 3 ): filCoords3.append( [ tmp['attrModif'].iloc[0], tmp['attrModif'].iloc[1], tmp['attrModif'].iloc[2] ] ) filTos3.append( [ tmp['toNb'].iloc[0], tmp['toNb'].iloc[1], tmp['toNb'].iloc[2] ] ) filNames3.append( i ) filNames = self.listTruncNbs[n] filCoords = [self.attrModif[:]] # filTos = self.truncCoords[n] myTree = myPandaUtilities.DataTree( self.attrModif) myTree.AnalysisDic(filNames, filCoords, filTos) myTree.Analysis1D(filNames1, filCoords1, filTos1) myTree.Analysis2D(filNames2, filCoords2, filTos2) myTree.Analysis3D(filNames3, filCoords3, filTos3) print '585555555555555555555555555555555555555555555555555555555555' print myTree.points print myTree.points[36] print len(myTree.points) print myTree.AlgorithmTree() #[ 'fileName','fileNum','RootNb',\ # 'RootNm','attrModif','branchWeight','from','to' ] ################################################################################