def view_calplot_heatmap(self, df: pd.DataFrame, df_cal: pd.DataFrame, store_id: str = 'CA_1', window: int = None): """ View daily sales for each store Args: df (pd.DataFrame) : sales train melted data frame containing store_id and sales df_cal (pd.DataFrame) : calendar DataFrame store_id (str) : unique store_id window (int, optional) : window for smoothing daily sales over period of time """ pd.set_option('mode.chained_assignment', None) group_stm = df.groupby(by='store_id') K = store_id # get daily sales for each store res = group_stm.get_group(name=K).groupby( by='day')['sales'].agg('sum').reset_index() del group_stm # sort by days res['day'] = res['day'].str.replace('d_', '').astype(int) res.sort_values(by='day', inplace=True) res['day'] = 'd_' + res['day'].astype(str) # merge on calendar res = res.merge(right=df_cal, how='left', left_on='day', right_on='d') # get events i.e sales for each date events = res[['date', 'sales']] events.iloc[:, 0] = pd.to_datetime( events['date'], format="%Y-%m-%d", errors='coerce') events.set_index(keys=['date'], inplace=True) events = events['sales'] # if window not None if window: # smooth using rolling window average events = events.rolling(window).mean() print(f"Calendar heatmap for sales of store {K} rolling average {window} days period") else: # do nothing print(f"Calendar heatmap for sales of store {K}") # do calplot calplot.calplot(events) plt.show()
def heatmap(self, car, vac, yr): plt.figure(figsize=(15, 6)) car['const'] = .1 vac['const'] = .9 events = pd.concat([vac[['date', 'const']], car[['date', 'const']]], axis=0) dates = pd.to_datetime(events['date'].values) # convert dates to datetime. Values to be used as index ev = pd.Series(events['const'].values, index=dates) # calmap expects a Pd.Series with datetime index ev_year = ev[f'{yr}'] calplot.calplot(ev_year, yearcolor='black', colorbar=False) ccount = len(car[car['year'] == yr]) vcount = len(vac[vac['year'] == yr]) title = f'Year {yr} \n Parking days observed: {ccount}' if vcount == 0 else \ f'Year {yr} Parking days observed: {ccount}, (Absent: {vcount} days)' plt.title(title) # plt.show() file_name = os.path.join(self.folder, f'calendar_{yr}.png') plt.savefig(file_name) plt.close() return
def run_eval(probs, y_test, method=1, **kwparams): if method == 1: return caldis(probs, y_test) elif method == 2: return calplot(probs, y_test, **kwparams) elif method == 3: return report(probs, y_test) elif method == 4: return roc(probs, y_test, **kwparams) elif method == 5: return confusion(probs, y_test) else: raise Exception("Invalid method argument given")
def plot_cal(self, method='mean', vmin=None, vmax=None, cmap='YlGn', norm=False): X = self.results['data'].copy() if vmin is not None: X[X<=vmin]=vmin if vmax is not None: X[X>=vmax]=vmax if norm: print('[pypiplot]> Normalizing..') X = (X-X.mean(axis=0)) / X.std(axis=0) print('[pypiplot]> Method: [%s]' %(method)) if method=='median': events=X.median(axis=1) elif method=='mean': events=X.mean(axis=1) else: events=X.sum(axis=1) # Make the calender plt.figure() calplot(events, cmap=cmap, colorbar=False, figsize=None, suptitle=None)
def plot(self, y0, m0, d0, y1, m1, d1, gid="all"): daily_minutes, all_days = self.query_daterange(y0, m0, d0, y1, m1, d1) for p in self.appid_name: self.id_name_dict[p[0]] = p[1] self.id_name_dict["all"] = "all" if gid not in self.id_name_dict.keys(): raise ValueError("No such game with id %d" % gid) # title = "%d.%d.%d-%d.%d.%d %s" % (y0, m0, d0, y1, m1, d1, self.id_name_dict[gid]) title = "%s" % (self.id_name_dict[gid]) events = pd.Series(daily_minutes[gid], index=all_days) fig, axes = calplot.calplot(events, edgecolor="grey", cmap='YlGn', figsize=(14, 2), suptitle=title, dropzero=False) fig.savefig('heatmap.pdf', bbox_inches='tight') fig.savefig('heatmap.png', bbox_inches='tight') plt.show()
plt.title('soft-DTW similarity measure S60') plt.plot(cluster, diss) plt.xlabel('n° of cluster') plt.ylabel('similarity between closest clusters') plt.show() #visualization import calplot #all days of 2013 all_days = pd.date_range('1/1/2013', periods=365, freq='D') #assign at every day the cluster events_train = pd.Series(prediction_train, index=all_days) #plot the result calplot.calplot(events_train, yearlabel_kws={'color': 'black'}, cmap='cool', suptitle='Clustering of the days train set S60', linewidth=2.3) #test days 2013 events_test = pd.Series(prediction_test, index=all_days) calplot.calplot(events_test, yearlabel_kws={'color': 'black'}, cmap='cool', suptitle='Clustering of the days test set S1816', linewidth=2.3) prediction_test #test days 2014 before = np.full(shape=90, fill_value=4, dtype=np.int) before
df['count'] = df['count'].apply(toFloat) df['over'] = df['over'].apply(toFloat) df['duration'] = tryTime((df.endTime - df.startTime).values) df['duration'] = df['duration'].dt.components.minutes plt.rcParams.update({'font.size': 18}) # In[ ]: df.tail() # In[ ]: calplot.calplot(df['date'].value_counts()) # In[ ]: from matplotlib.ticker import MaxNLocator fig, ax = plt.subplots(1, 1, figsize=(15, 5)) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) df['coffee'].value_counts().plot(kind='barh') ax.set(xlabel='Amount of Observations', title='Amounts of different coffees') plt.tight_layout() save_fig(plt.gcf(), 'coffee_bar.png') # In[ ]: (
plt.xlabel('day') plt.ylabel('denisty') plt.title('centroid density k=3') plt.legend() plt.show() #visualization import calplot #all days of 2013 all_days = pd.date_range('1/1/2013', periods=365, freq='D') #assign at every day the cluster events_train = pd.Series(prediction_train, index=all_days) #plot the result calplot.calplot(events_train,yearlabel_kws={'color': 'black'}, cmap='cool', suptitle='Clustering of the days') #test days 2013 events_test = pd.Series(prediction_test, index=all_days) calplot.calplot(events_test,yearlabel_kws={'color': 'black'}, cmap='cool', suptitle='Clustering of the days') prediction_test #test days 2014 before= np.full(shape=90,fill_value=4,dtype=np.int) before after= np.full(shape=153,fill_value=4,dtype=np.int) after #concatenate arrays test=np.concatenate((before, prediction_test,after))
maxDateTime = max(data_frame.index).date().strftime("%Y%m%d") return maxDateTime + "_" + minDateTime def saveHitmap(file_prefix: str, figure: Figure): figure.savefig(f"{file_prefix}_hitmap.png") def saveDebugStat(file_prefix: str, data_frame: DataFrame): data_frame.groupby( data_frame.index.date).count().to_csv(f'{file_prefix}_stat.csv') def saveMergedData(file_prefix: str, data_frame: DataFrame): data_frame.to_csv(f'{file_prefix}_merged.csv') if __name__ == '__main__': filenames = sys.argv[1:] dfs = [loadData(filename) for filename in filenames] df = pd.concat(dfs, join='outer') df['hasDay'] = 1 df.drop_duplicates(inplace=True) df.sort_index(inplace=True) fig, _ = calplot.calplot(df['hasDay'], cmap='Blues', colorbar=False) prefix = createFilePrefix(df) saveMergedData(prefix, df) saveHitmap(prefix, fig) # saveDebugStat(prefix, df)
s=month_name[month_ - 1], color='#002f4f', fontsize=12) ax1.add_collection(collection) mp.show() elif args.cal: l = getDetailsGivenTag(args.cal) ddf = getDatesfromTagPhotoIDList(l) ddf.columns = ['d'] ddf['Count'] = 0 # to plot no bars on days no photo was taken a Count col with 0 created cnts = ddf.groupby(p.Grouper(key='d', freq='D')).count() events = p.Series(cnts.Count, index=cnts.index) calplot.calplot(events, edgecolor=None, cmap='YlGn', colorbar=True, suptitle=args.cal + '- photos per day', linewidth=1) mp.show() elif args.age: if args.age == 'all': print('Days since last photo') lall = [] for tagname, tagcnt in tagdf.groupby([ 'name' ]).count()['photo_id_list'].sort_values().iteritems(): #.to_string()) #print(tagname,end=' ') l = getDetailsGivenTag(tagname) ddf = getDatesfromTagPhotoIDList(l, False) lastdate = ddf[0].iloc[-1]
def test_calplot(events): """ With `calplot` we can plot several years in one figure. """ fig, axes = calplot.calplot(events) return fig
def _create(self): calplot.calplot(self.data, edgecolor=None, cmap='YlGn')
plt.subplot(4, 2, 8) plt.plot(x, centroids[3][:, 1], 'r-', label='speed') plt.xlabel('day') plt.ylabel('speed') plt.title('centroid speed k=3') plt.legend() plt.show() #visualization import calplot #all days of 2013 all_days = pd.date_range('1/1/2013', periods=365, freq='D') #assign at every day the cluster events_train = pd.Series(prediction_train, index=all_days) #plot the result calplot.calplot(events_train, textformat='{:.0f}') #test days 2013 events_test = pd.Series(prediction_test, index=all_days) #plot the result calplot.calplot(events_test, textformat='{:.0f}') #test days 2014 before = np.full(shape=90, fill_value=4, dtype=np.int) before after = np.full(shape=153, fill_value=4, dtype=np.int) after #concatenate arrays test = np.concatenate((before, prediction_test, after)) len(test) test_days = pd.date_range('1/1/2014', periods=365, freq='D') events_test = pd.Series(test, index=test_days)
import pandas as pd def loadData(filename: str): df = pd.read_csv(filename, usecols=['DateTime', 'Open', 'High', 'Low', 'Close', 'Volume'], na_values=['nan']) df['DateTime'] = pd.to_datetime(df['DateTime'], utc=True).dt.tz_convert('US/Eastern') df = df.set_index('DateTime') return df def resample(df): return df.resample('1min').agg( OrderedDict([ ('Open', 'first'), ('High', 'max'), ('Low', 'min'), ('Close', 'last'), ('Volume', 'sum'), ]) ).dropna() if __name__ == '__main__': file_path = sys.argv[1] data_frame = loadData(file_path) # data_frame = resample(data_frame) data_frame['hasDay'] = 1 fig, _ = calplot.calplot(data_frame['hasDay'], cmap='Blues', colorbar=False) print(f"Calendar hitmap has been saved to {file_path}_hitmap.png") fig.savefig(f"{file_path}_hitmap.png")