Exemple #1
0
    def view_calplot_heatmap(self, df: pd.DataFrame, df_cal: pd.DataFrame, store_id: str = 'CA_1', window: int = None):
        """
        View daily sales for each store

        Args:
            df (pd.DataFrame) : sales train melted data frame containing store_id and sales
            df_cal (pd.DataFrame) : calendar DataFrame
            store_id (str) : unique store_id
            window (int, optional) : window for smoothing daily sales over period of time

        """
        pd.set_option('mode.chained_assignment', None)

        group_stm = df.groupby(by='store_id')
        K = store_id

        # get daily sales for each store
        res = group_stm.get_group(name=K).groupby(
            by='day')['sales'].agg('sum').reset_index()
        del group_stm

        # sort by days
        res['day'] = res['day'].str.replace('d_', '').astype(int)
        res.sort_values(by='day', inplace=True)

        res['day'] = 'd_' + res['day'].astype(str)

        # merge on calendar
        res = res.merge(right=df_cal, how='left', left_on='day', right_on='d')

        # get events i.e sales for each date
        events = res[['date', 'sales']]
        events.iloc[:, 0] = pd.to_datetime(
            events['date'], format="%Y-%m-%d", errors='coerce')
        events.set_index(keys=['date'], inplace=True)
        events = events['sales']

        # if window not None
        if window:
            # smooth using rolling window average
            events = events.rolling(window).mean()
            print(f"Calendar heatmap for sales of store {K} rolling average {window} days period")
        else:
            # do nothing
            print(f"Calendar heatmap for sales of store {K}")

        # do calplot
        calplot.calplot(events)
        plt.show()
Exemple #2
0
 def heatmap(self, car, vac, yr):
     plt.figure(figsize=(15, 6))
     car['const'] = .1
     vac['const'] = .9
     events = pd.concat([vac[['date', 'const']], car[['date', 'const']]], axis=0)
     dates = pd.to_datetime(events['date'].values)  # convert dates to datetime. Values to be used as index
     ev = pd.Series(events['const'].values, index=dates)  # calmap expects a Pd.Series with datetime index
     ev_year = ev[f'{yr}']
     calplot.calplot(ev_year, yearcolor='black', colorbar=False)
     ccount = len(car[car['year'] == yr])
     vcount = len(vac[vac['year'] == yr])
     title = f'Year {yr} \n Parking days observed: {ccount}' if vcount == 0 else \
             f'Year {yr}  Parking days observed: {ccount}, (Absent: {vcount} days)'
     plt.title(title)
     # plt.show()
     file_name = os.path.join(self.folder, f'calendar_{yr}.png')
     plt.savefig(file_name)
     plt.close()
     return
def run_eval(probs, y_test, method=1, **kwparams):
    if method == 1:
        return caldis(probs, y_test)
    elif method == 2:
        return calplot(probs, y_test, **kwparams)
    elif method == 3:
        return report(probs, y_test)
    elif method == 4:
        return roc(probs, y_test, **kwparams)
    elif method == 5:
        return confusion(probs, y_test)
    else:
        raise Exception("Invalid method argument given")
Exemple #4
0
    def plot_cal(self, method='mean', vmin=None, vmax=None, cmap='YlGn', norm=False):

        X = self.results['data'].copy()
        if vmin is not None:
            X[X<=vmin]=vmin
        if vmax is not None:
            X[X>=vmax]=vmax

        if norm:
            print('[pypiplot]> Normalizing..')
            X = (X-X.mean(axis=0)) / X.std(axis=0)

        print('[pypiplot]> Method: [%s]' %(method))
        if method=='median':
            events=X.median(axis=1)
        elif method=='mean':
            events=X.mean(axis=1)
        else:
            events=X.sum(axis=1)
        # Make the calender
        plt.figure()
        calplot(events, cmap=cmap, colorbar=False, figsize=None, suptitle=None)
def run_eval(probs, y_test, method=1, **kwparams):
    if method == 1:
        return caldis(probs, y_test)
    elif method == 2:
        return calplot(probs, y_test, **kwparams)
    elif method == 3:
        return report(probs, y_test)
    elif method == 4:
        return roc(probs, y_test, **kwparams)
    elif method == 5:
        return confusion(probs, y_test)
    else:
        raise Exception("Invalid method argument given")
Exemple #6
0
    def plot(self, y0, m0, d0, y1, m1, d1, gid="all"):
        daily_minutes, all_days = self.query_daterange(y0, m0, d0, y1, m1, d1)

        for p in self.appid_name:
            self.id_name_dict[p[0]] = p[1]

        self.id_name_dict["all"] = "all"
        if gid not in self.id_name_dict.keys():
            raise ValueError("No such game with id %d" % gid)

        # title = "%d.%d.%d-%d.%d.%d %s" % (y0, m0, d0, y1, m1, d1, self.id_name_dict[gid])
        title = "%s" % (self.id_name_dict[gid])

        events = pd.Series(daily_minutes[gid], index=all_days)
        fig, axes = calplot.calplot(events,
                                    edgecolor="grey",
                                    cmap='YlGn',
                                    figsize=(14, 2),
                                    suptitle=title,
                                    dropzero=False)

        fig.savefig('heatmap.pdf', bbox_inches='tight')
        fig.savefig('heatmap.png', bbox_inches='tight')
        plt.show()
Exemple #7
0
plt.title('soft-DTW similarity measure S60')
plt.plot(cluster, diss)
plt.xlabel('n° of cluster')
plt.ylabel('similarity between closest clusters')
plt.show()

#visualization
import calplot
#all days of 2013
all_days = pd.date_range('1/1/2013', periods=365, freq='D')
#assign at every day the cluster
events_train = pd.Series(prediction_train, index=all_days)
#plot the result
calplot.calplot(events_train,
                yearlabel_kws={'color': 'black'},
                cmap='cool',
                suptitle='Clustering of the days train set S60',
                linewidth=2.3)

#test days 2013
events_test = pd.Series(prediction_test, index=all_days)
calplot.calplot(events_test,
                yearlabel_kws={'color': 'black'},
                cmap='cool',
                suptitle='Clustering of the days test set S1816',
                linewidth=2.3)
prediction_test

#test days 2014
before = np.full(shape=90, fill_value=4, dtype=np.int)
before
Exemple #8
0
df['count'] = df['count'].apply(toFloat)
df['over'] = df['over'].apply(toFloat)

df['duration'] = tryTime((df.endTime - df.startTime).values)
df['duration'] = df['duration'].dt.components.minutes

plt.rcParams.update({'font.size': 18})

# In[ ]:

df.tail()

# In[ ]:

calplot.calplot(df['date'].value_counts())

# In[ ]:

from matplotlib.ticker import MaxNLocator
fig, ax = plt.subplots(1, 1, figsize=(15, 5))
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
df['coffee'].value_counts().plot(kind='barh')
ax.set(xlabel='Amount of Observations', title='Amounts of different coffees')
plt.tight_layout()
save_fig(plt.gcf(), 'coffee_bar.png')

# In[ ]:


(
Exemple #9
0
plt.xlabel('day')
plt.ylabel('denisty')
plt.title('centroid density k=3')
plt.legend()
plt.show()



#visualization 
import calplot
#all days of 2013 
all_days = pd.date_range('1/1/2013', periods=365, freq='D')
#assign at every day the cluster 
events_train = pd.Series(prediction_train, index=all_days)
#plot the result 
calplot.calplot(events_train,yearlabel_kws={'color': 'black'}, cmap='cool', suptitle='Clustering of the days')  

#test days 2013
events_test = pd.Series(prediction_test, index=all_days)
calplot.calplot(events_test,yearlabel_kws={'color': 'black'}, cmap='cool', suptitle='Clustering of the days')  
prediction_test



#test days 2014
before= np.full(shape=90,fill_value=4,dtype=np.int)
before
after= np.full(shape=153,fill_value=4,dtype=np.int)
after
#concatenate arrays 
test=np.concatenate((before, prediction_test,after))
Exemple #10
0
    maxDateTime = max(data_frame.index).date().strftime("%Y%m%d")
    return maxDateTime + "_" + minDateTime


def saveHitmap(file_prefix: str, figure: Figure):
    figure.savefig(f"{file_prefix}_hitmap.png")


def saveDebugStat(file_prefix: str, data_frame: DataFrame):
    data_frame.groupby(
        data_frame.index.date).count().to_csv(f'{file_prefix}_stat.csv')


def saveMergedData(file_prefix: str, data_frame: DataFrame):
    data_frame.to_csv(f'{file_prefix}_merged.csv')


if __name__ == '__main__':
    filenames = sys.argv[1:]
    dfs = [loadData(filename) for filename in filenames]
    df = pd.concat(dfs, join='outer')
    df['hasDay'] = 1
    df.drop_duplicates(inplace=True)
    df.sort_index(inplace=True)
    fig, _ = calplot.calplot(df['hasDay'], cmap='Blues', colorbar=False)

    prefix = createFilePrefix(df)
    saveMergedData(prefix, df)
    saveHitmap(prefix, fig)
    # saveDebugStat(prefix, df)
                 s=month_name[month_ - 1],
                 color='#002f4f',
                 fontsize=12)
        ax1.add_collection(collection)
    mp.show()
elif args.cal:
    l = getDetailsGivenTag(args.cal)
    ddf = getDatesfromTagPhotoIDList(l)
    ddf.columns = ['d']
    ddf['Count'] = 0
    # to plot no bars on days no photo was taken a Count col with 0 created
    cnts = ddf.groupby(p.Grouper(key='d', freq='D')).count()
    events = p.Series(cnts.Count, index=cnts.index)
    calplot.calplot(events,
                    edgecolor=None,
                    cmap='YlGn',
                    colorbar=True,
                    suptitle=args.cal + '- photos per day',
                    linewidth=1)
    mp.show()

elif args.age:
    if args.age == 'all':
        print('Days since last photo')
        lall = []
        for tagname, tagcnt in tagdf.groupby([
                'name'
        ]).count()['photo_id_list'].sort_values().iteritems():  #.to_string())
            #print(tagname,end=' ')
            l = getDetailsGivenTag(tagname)
            ddf = getDatesfromTagPhotoIDList(l, False)
            lastdate = ddf[0].iloc[-1]
Exemple #12
0
def test_calplot(events):
    """
    With `calplot` we can plot several years in one figure.
    """
    fig, axes = calplot.calplot(events)
    return fig
Exemple #13
0
 def _create(self):
     calplot.calplot(self.data, edgecolor=None, cmap='YlGn')
Exemple #14
0
plt.subplot(4, 2, 8)
plt.plot(x, centroids[3][:, 1], 'r-', label='speed')
plt.xlabel('day')
plt.ylabel('speed')
plt.title('centroid speed k=3')
plt.legend()
plt.show()

#visualization
import calplot
#all days of 2013
all_days = pd.date_range('1/1/2013', periods=365, freq='D')
#assign at every day the cluster
events_train = pd.Series(prediction_train, index=all_days)
#plot the result
calplot.calplot(events_train, textformat='{:.0f}')
#test days 2013
events_test = pd.Series(prediction_test, index=all_days)
#plot the result
calplot.calplot(events_test, textformat='{:.0f}')

#test days 2014
before = np.full(shape=90, fill_value=4, dtype=np.int)
before
after = np.full(shape=153, fill_value=4, dtype=np.int)
after
#concatenate arrays
test = np.concatenate((before, prediction_test, after))
len(test)
test_days = pd.date_range('1/1/2014', periods=365, freq='D')
events_test = pd.Series(test, index=test_days)
import pandas as pd


def loadData(filename: str):
    df = pd.read_csv(filename, usecols=['DateTime', 'Open', 'High', 'Low', 'Close', 'Volume'], na_values=['nan'])
    df['DateTime'] = pd.to_datetime(df['DateTime'], utc=True).dt.tz_convert('US/Eastern')
    df = df.set_index('DateTime')
    return df


def resample(df):
    return df.resample('1min').agg(
        OrderedDict([
            ('Open', 'first'),
            ('High', 'max'),
            ('Low', 'min'),
            ('Close', 'last'),
            ('Volume', 'sum'),
        ])
    ).dropna()


if __name__ == '__main__':
    file_path = sys.argv[1]
    data_frame = loadData(file_path)
    # data_frame = resample(data_frame)
    data_frame['hasDay'] = 1
    fig, _ = calplot.calplot(data_frame['hasDay'], cmap='Blues', colorbar=False)
    print(f"Calendar hitmap has been saved to {file_path}_hitmap.png")
    fig.savefig(f"{file_path}_hitmap.png")