def find_hourly_deck_usage(parking_data): # List of lots you want to look at parking_deck_names = ["Lot 10", "PARK", "Science & Tech Garage"] # A list of the average # of available spots, in each parking deck, in our dataset. hourly_averages = [] for time, hourly_parking_data in parking_data.groupby(TimeGrouper("5min")): df = hourly_parking_data.groupby('deck').mean() # Metadata about this hour hour_average = { 'hour': time.hour + (time.minute / 60), 'weekday': time.weekday(), 'weekday_name': time.weekday_name, 'entered': time } # Add in all of the decks hour_average.update({ name: df[df.index == name].available.mean() for name in parking_deck_names }) hourly_averages.append(hour_average) return DataFrame(hourly_averages)
def YearlyLinePlot(self): self.floored_data.plot() groups = self.floored_data.groupby(TimeGrouper('A')) years = DataFrame() iter = 1 for name, group in groups: extendedValues = group.values #fill zeros for the sentiment of months of the first analyzed year if iter == 1: extendedValues = np.append(np.zeros(12 - len(group.values)), group.values) # fill zeros for the sentiment of months of the last analyzed year if iter == len(groups): extendedValues = np.append(group.values, np.zeros(12 - len(group.values))) years[name.year] = extendedValues pyplot.ylabel(name.year) iter += 1 years.plot(subplots=True, legend=False) pyplot.xlabel("Month") pyplot.ylabel("Year") pyplot.savefig(self.savePath + "/yearlyLinechart.png") pyplot.close()
def HeatMapBlurry(self): self.floored_data.plot() groups = self.floored_data.groupby(TimeGrouper('A')) yearLabels = [] for key, value in groups.groups.iteritems(): yearLabels.append(str(key.year)) yearLabels.sort() years = DataFrame() iter = 1 for name, group in groups: extendedValues = group.values # fill zeros for the sentiment of months of the first analyzed year if iter == 1: extendedValues = np.append(np.zeros(12 - len(group.values)), group.values) # fill zeros for the sentiment of months of the last analyzed year if iter == len(groups): extendedValues = np.append(group.values, np.zeros(12 - len(group.values))) years[name.year] = extendedValues pyplot.ylabel(name.year) iter += 1 years = years.T pyplot.matshow(years, interpolation=None, aspect='auto') #pyplot.colorbar(heatmap) pyplot.savefig(self.savePath + "/heatMap.png") pyplot.close()
def HeatMap(self): groups = self.floored_data.groupby(TimeGrouper('A')) finalData = [] years = [] iter = 1 hadToAddZeros = False for name, group in groups: extendedValues = group.values #if there are some months missing in a year if (12 - len(group.values) > 0): # fill zeros for the sentiment of months of the first analyzed year if iter == 1: extendedValues = np.append( np.zeros(12 - len(group.values)), group.values) hadToAddZeros = True # fill zeros for the sentiment of months of the last analyzed year if iter == len(groups): extendedValues = np.append( group.values, np.zeros(12 - len(group.values))) hadToAddZeros = True finalData.append(extendedValues) years.append(str(name.year)) iter += 1 data = np.array(finalData) fig, axis = pyplot.subplots() #if I added zeroes, color scheme needs to be adjusted if hadToAddZeros: heatmap = axis.pcolor(data) else: heatmap = axis.pcolor(data, cmap=pyplot.cm.Reds) axis.set_yticks(np.arange(data.shape[0]) + 0.6, minor=False) axis.set_xticks(np.arange(data.shape[1]) + 0.6, minor=False) axis.invert_yaxis() column_labels = [ "Jan", "Feb", "Mar", "Apr", "Mai", "Jun", "Jul", "Aug", "Sept", "Oct", "Nov", "Dec" ] axis.set_yticklabels(years, minor=False) axis.set_xticklabels(column_labels, minor=False) figureHeight = len(years) * 0.5 fig.set_size_inches(11, figureHeight) pyplot.colorbar(heatmap) pyplot.savefig(self.savePath + "/heatMap.png", dpi=100) #once plotted, replace placeholders in the post with open(self.reportFileName) as f: newText = f.read().replace( '<HEATMAP_MONTHLY_HIGHEST>', "{0:.2f}".format(round(np.max(data), 2))) newText = newText.replace('<HEATMAP_MONTHLY_LOWEST>', "{0:.2f}".format(round(np.min(data), 2))) with open(self.reportFileName, "w") as f: f.write(newText)
def getBoxWhiskerPlot(): X = validate.getdatafile() X.astype('float32') groepen = X['1964':'1970'].groupby(TimeGrouper('A')) jaren = DataFrame() for name, groep in groepen: jaren[name.year] = groep.values jaren.boxplot() pyplot.show()
def plot_seasonal_graph(): series = Series.from_csv('dataset_training.csv') groups = series['2010':'2016'].groupby(TimeGrouper('A')) years = DataFrame() pyplot.figure() i = 1 n_groups = len(groups) for name, group in groups: pyplot.subplot((n_groups * 100) + 10 + i) i += 1 pyplot.plot(group) pyplot.show()
def getSeasonalLine(): X = validate.getdataset() X.astype('float32') groepen = X['1964':'1970'].groupby(TimeGrouper('A')) jaren = DataFrame() pyplot.figure() i = 1 n_groep = len(groepen) for name, groep in groepen: pyplot.subplot((n_groep * 100) + 10 + i) i += 1 pyplot.plot(groep) pyplot.show()
def time_grouper_plot(series,plot_file): plt.clf() plt.figure(figsize=(10,7)) groups = series['2007':'2017'].groupby(TimeGrouper('A')) print(type(groups)) years = DataFrame() for name, group in groups: years[name.year] = group.values years.boxplot() plt.xticks(rotation=45) plt.xlabel('Year') plt.ylabel('Temperature [°C]') plt.title('Temperature changes box and whisker plots') plt.savefig(plot_file)
def mainpower_data(start, end=None): """ Endpoint for get main power raw data from enerPI. :param start: start time of data interval :param end: end time of data interval """ data = _get_enerpi_data(start, end, is_consumption=False) if (data is not None) and not data.empty: daily_sum = request.args.get('daily', 'False').lower() == 'true' round_prec = int(request.args.get('round', '4')) data.index = data.index.tz_localize(SENSORS.TZ) if daily_sum: data = data.groupby(TimeGrouper('D')).sum() return jsonify(json.loads(data.to_json(double_precision=round_prec))) return abort(500)
def seasonal_plot(series, plot_file): groups = series['2007':'2017'].groupby(TimeGrouper('A')) n_groups = len(groups) plt.clf( ) #automatically removed inner labels on the grid to make the plot cleaner. plt.figure(figsize=(10, 10)) plt.title('Seasonal per year line plots') #fig, ax = plt.subplots(n_groups,1, sharex='col', sharey='row') #fig.subplots_adjust(hspace=0.3, wspace=0.2) #specify the spacing along the height and width of the figure i = 1 #helpful for subplot for name, group in groups: plt.subplot(n_groups, 1, i) i += 1 group.plot() plt.xticks([]) #in order to gain readability of data plt.xlabel('') plt.savefig(plot_file)
def hello(): form = ReusableForm(request.form) print (form.errors) if request.method == 'POST': name=request.form['name'] series = Series.from_csv('dataset.csv') #print(name,"kaam ki jagah") nb=name groups = series[name].groupby(TimeGrouper('A')) #years = DataFrame() pyplot.figure() pyplot.xlabel('Month --------->') pyplot.ylabel('rainfall(mm) --------->') pyplot.title('rainfall ') i = 1 n_groups = len(groups) #print("p ngroup",n_groups) for name, group in groups: pyplot.subplot((n_groups*100) + 10 + i) i += 1 #print("p group",name) pyplot.plot(group) pyplot.savefig('E:/minor/4-FlaskForms/static/pj.jpg') #print (name, " in hello") if form.validate(): # Save the comment here. flash('this is the graph of the rainfall for the year '+nb ) else: flash('Error: Enter some value ') return render_template('hello.html', form=form)
ax.grid() ax.plot(series["Open"]) fig.show() # ## Split the time series into years (57 series) # and plot normalized time series # In[ ]: from pandas import TimeGrouper from pandas import DataFrame groups = series.groupby(TimeGrouper('Y')) pd.Grouper() df.groupby(Grouper(key = )) df.index = pd.to_datetime(df.index) out = df.groupby(df.index.year) out.get_group(2000) (out) out.max()
# -*- coding: utf-8 -*- """ Created on Sat Jun 17 02:25:33 2017 @author: user """ # create a boxplot of monthly data from pandas import Series from pandas import DataFrame from pandas import TimeGrouper from matplotlib import pyplot from pandas import concat series = Series.from_csv('TSData2.csv', header=0) one_year = series['2017'] groups = one_year.groupby(TimeGrouper('M')) months = concat([DataFrame(x[1].values) for x in groups], axis=1) months = DataFrame(months) months.columns = range(1, 6) months.boxplot() pyplot.show()
def time_dt_timegrouper_size(self): self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
def grid_interp_ts(df, time_col, x_col, y_col, data_col, grid_res, from_crs=None, to_crs=2193, interp_fun='cubic', agg_ts_fun=None, period=None, digits=2): """ Function to take a dataframe of z values and interate through and resample both in time and space. Returns a DataFrame structured like df. df -- DataFrame containing four columns as shown in the below parameters.\n time_col -- The time column name.\n x_col -- The x column name.\n y_col -- The y column name.\n data_col -- The data column name.\n grid_res -- The resulting grid resolution in meters (or the unit of the final projection).\n from_crs -- The projection info for the input data if the result should be reprojected to the to_crs projection (either a proj4 str or epsg int).\n to_crs -- The projection for the output data similar to from_crs.\n interp_fun -- The scipy Rbf interpolation function to be applied (see https://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.interpolate.Rbf.html).\n agg_ts_fun -- The pandas time series resampling function to resample the data in time (either 'mean' or 'sum'). If None, then no time resampling.\n period -- The pandas time series code to resample the data in time (i.e. '2H' for two hours).\n digits -- the number of digits to round to (int). """ #### Create the grids df1 = df.copy() #### Resample the time series data if agg_ts_fun is not None: df1a = df1.set_index(time_col) if agg_ts_fun == 'sum': df2 = df1a.groupby( [TimeGrouper(period), Grouper(y_col), Grouper(x_col)])[data_col].sum().reset_index() elif agg_ts_fun == 'mean': df2 = df1a.groupby( [TimeGrouper(period), Grouper(y_col), Grouper(x_col)])[data_col].mean().reset_index() else: raise ValueError("agg_ts_fun should be either 'sum' or 'mean'.") time = df2[time_col].unique() else: df2 = df1 time = df2[time_col].sort_values().unique() if from_crs is None: x = df2.loc[df2[time_col] == time[0], x_col].values y = df2.loc[df2[time_col] == time[0], y_col].values else: data1 = df2.loc[df2[time_col] == time[0]] from_crs1 = convert_crs(from_crs, pass_str=True) to_crs1 = convert_crs(to_crs, pass_str=True) geometry = [Point(xy) for xy in zip(data1[x_col], data1[y_col])] gpd = GeoDataFrame(data1.index, geometry=geometry, crs=from_crs1) gpd1 = gpd.to_crs(crs=to_crs1) x = gpd1.geometry.apply(lambda p: p.x).round(digits).values y = gpd1.geometry.apply(lambda p: p.y).round(digits).values xy = column_stack((x, y)) max_x = x.max() min_x = x.min() max_y = y.max() min_y = y.min() new_x = arange(min_x, max_x, grid_res) new_y = arange(min_y, max_y, grid_res) x_int, y_int = meshgrid(new_x, new_y) #### Create new df x_int2 = x_int.flatten() y_int2 = y_int.flatten() xy_int = column_stack((x_int2, y_int2)) time_df = repeat(time, len(x_int2)) x_df = tile(x_int2, len(time)) y_df = tile(y_int2, len(time)) new_df = DataFrame({ 'time': time_df, 'x': x_df, 'y': y_df, data_col: repeat(0, len(time) * len(x_int2)) }) new_lst = [] for t in to_datetime(time): set1 = df2.loc[df2[time_col] == t, data_col] # index = new_df[new_df['time'] == t].index new_z = griddata(xy, set1.values, xy_int, method=interp_fun).round(digits) new_z[new_z < 0] = 0 new_lst.extend(new_z.tolist()) # print(t) new_df.loc[:, data_col] = new_lst #### Export results return (new_df[new_df[data_col].notnull()])
from pandas import Series from matplotlib import pyplot from pandas import DataFrame from pandas import TimeGrouper from pandas import concat from pandas.plotting import lag_plot from pandas.plotting import autocorrelation_plot series = Series.from_csv( "/Users/richardcollins/Desktop/Time_Series/daily-min-temperatures.csv", header=0) print(series.head()) print(len(series)) # Group data by years and by months (in 1990) groups = series.groupby(TimeGrouper('A')) years = DataFrame() for name, group in groups: years[name.year] = group.values series_1990 = series['1990'] groups_1990 = series_1990.groupby(TimeGrouper('M')) months = concat([DataFrame(x[1].values) for x in groups_1990], axis=1) months = DataFrame(months) months.columns = range(1, 13) # Line plot series.plot(linewidth=0.2) pyplot.show() # Line plot per year years.plot(subplots=True, legend=False)
# boxplots of time series from pandas import Series from pandas import DataFrame from pandas import TimeGrouper from matplotlib import pyplot series = Series.from_csv('dataset.csv') groups = series['1964':'1970'].groupby(TimeGrouper('A')) years = DataFrame() for name, group in groups: years[name.year] = group.values years.boxplot() pyplot.show()
def HeatMapWeekly(self): groups = self.data.groupby(TimeGrouper('A')) finalData = [] years = [] iter = 1 hadToAddZeros = False for name, group in groups: extendedValues = group.values # if there are some weeks missing in a year if (52 - len(group.values) > 0): # fill zeros for the sentiment of months of the first analyzed year if iter == 1: extendedValues = np.append( np.zeros(52 - len(group.values)), group.values) hadToAddZeros = True # fill zeros for the sentiment of months of the last analyzed year if iter == len(groups): extendedValues = np.append( group.values, np.zeros(52 - len(group.values))) hadToAddZeros = True #remove extra weeks if one week somehow jumps from year to year in December/January if len(extendedValues) > 52: extendedValues = extendedValues[:52] finalData.append(extendedValues) years.append(str(name.year)) iter += 1 data = np.array(finalData) fig, axis = pyplot.subplots() # if I added zeroes, color scheme needs to be adjusted if hadToAddZeros: heatmap = axis.pcolor(data) else: heatmap = axis.pcolor(data, cmap=pyplot.cm.Reds) axis.set_yticks(np.arange(data.shape[0]) + 0.6, minor=False) axis.set_xticks(np.arange(data.shape[1]) + 0.6, minor=False) axis.invert_yaxis() column_labels = ["{:02d}".format(x) for x in range(1, 53)] axis.set_yticklabels(years, minor=False) axis.set_xticklabels(column_labels, minor=False) axis.set_xlim(0, len(column_labels)) figureHeight = len(years) * 0.5 fig.set_size_inches(11, figureHeight) pyplot.colorbar(heatmap) pyplot.xticks(fontsize=7) pyplot.savefig(self.savePath + "/heatMapWeekly.png", dpi=100) # once plotted, replace placeholders in the post with open(self.reportFileName) as f: newText = f.read().replace( '<HEATMAP_WEEKLY_HIGHEST>', "{0:.2f}".format(round(np.max(data), 2))) newText = newText.replace('<HEATMAP_WEEKLY_LOWEST>', "{0:.2f}".format(round(np.min(data), 2))) with open(self.reportFileName, "w") as f: f.write(newText)
pyplot.figure(1) pyplot.subplot(211) series_train.hist() pyplot.subplot(212) series_train.plot(kind='kde') pyplot.show() # In[39]: from pandas import DataFrame from pandas import TimeGrouper groups = series_train['2017-12-19 23:10:00':'2017-12-19 23:17:35'].groupby( TimeGrouper('S')) seconds = DataFrame() pyplot.figure() i = 1 n_groups = len(groups) for name, group in groups: pyplot.subplot((n_groups * 100) + 10 + i) i += 1 pyplot.plot(group) pyplot.show()
# eop ## import matplotlib as plt from pandas import Series from pandas import DataFrame from pandas import TimeGrouper from matplotlib import pyplot # series = Series.from_csv('daily-minimum-temperatures.csv', header=0) plt.rcParams["figure.figsize"] = (16,4) groups = series.groupby(TimeGrouper('A')) years = DataFrame() for name, group in groups: years[name.year] = group.values years.plot(subplots=True, legend=False) pyplot.show() ### python from datetime import date d = date.fromordinal(730920) # 730920th day after 1. 1. 0001 d # datetime.date(2002, 3, 11) t = d.timetuple()
#%% #%% #Monthly median box and whisker from pandas import Series from pandas import DataFrame from pandas import TimeGrouper from matplotlib import pyplot from pandas import concat series = pd.read_csv('daily-minimum-temperatures.csv', delimiter=',', header=0) one_year = series['1990'] groups = arsi.groupby(TimeGrouper('M')) months = concat([DataFrame(x[1].values) for x in groups], axis=1) months = DataFrame(months) months.columns = range(1,13) months.boxplot() pyplot.show() #%% #Lag plot from pandas import Series from pandas import DataFrame from pandas import concat
obs = test[i] history.append(obs) print('>Predicted=%.3f, Expected=%3.f' % (yhat, obs)) # report performance mse = mean_squared_error(test, predictions) rmse = sqrt(mse) print('RMSE: %.3f' % rmse) print(series.describe()) series.plot() plt.show() groups = series['2017-07':'2017-12'].groupby(TimeGrouper('M')) months = DataFrame() plt.figure(figsize=(36, 36)) i = 1 n_groups = len(groups) for name, group in groups: plt.subplot((n_groups*100) + 10 + i) i += 1 plt.plot(group) plt.show() plt.figure(1) plt.subplot(211) series.hist() plt.subplot(212)
def describe_and_plot(series): series = pd.read_csv('data/daily-total-female-births-in-cal.csv', header=0, index_col=0, parse_dates=True, squeeze=True) series.index = pd.to_datetime(series.index) # ### Inspect the data and print some basic information print("\n") print("#### Daily total female births ####\n") print(series.head()) print("\n") print('Size = %d \n' % series.size) interest_date = "1959-06-21" print("Nr of births on %s = %d \n" % (interest_date, series[interest_date])) print("Stats:") print(series.describe()) print("-" * 50) print("\n") # Time series plots # We group the data per month series_groups = series.groupby(TimeGrouper('M')) months = pd.DataFrame() for name, group in series_groups: # pd.Series to fix difference in columns length months[name.month] = pd.Series(group.values) # Prepare the figure where the plots will be placed fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6)) = plt.subplots(3, 2, figsize=(16, 28)) # Line plots ax1.plot(series) ax1.set(xlabel="Date", ylabel="Daily Births", title="Line plot") # Histograms and density plot sns.distplot(series, rug=True, bins=20, ax=ax2) ax2.set(xlabel="Sales", ylabel="Counts", title="Histogram and density plot") # Box and whisker plot months.boxplot(ax=ax3) ax3.set(xlabel="Months", ylabel="Daily Births", title="Box and whisker plot") # Heatmap plot img4 = ax4.matshow(months, interpolation=None, aspect='auto') xaxis = range(-1, 13, 2) yaxis = range(-4, 33, 5) ax4.set(xlabel="Month", ylabel="Day", xticklabels=xaxis, yticklabels=yaxis, title="Heatmap plot") ax4.xaxis.tick_bottom() fig.colorbar(img4, ax=ax4, aspect=5) # Lag plot lag_plot(series, ax=ax5) diagonal = range(int(series.min()), int(series.max())) ax5.plot(diagonal, diagonal, '--k') ax5.set(xlabel="Births(t)", ylabel="Births(t+1)", title="Lag plot") # Autocorrelation plot autocorrelation_plot(series, ax=ax6) ax6.set(title="Autocorrelation plot", ylim=(-0.5, 0.5)) fig.subplots_adjust(hspace=0.6) plt.show()
from pandas.plotting import autocorrelation_plot series = read_csv('daily-minimum-temperatures.csv', parse_dates=[0], index_col=0, squeeze=True) print(series.head()) # basic line plot series.plot() pyplot.show() # dot plot series.plot(style='k.') pyplot.show() # group line plot by years groups = series.groupby(TimeGrouper('A')) years = DataFrame() for name, group in groups: years[name.year] = group.values years.plot(subplots=True, legend=False) pyplot.show() # histogram plot series.hist() pyplot.show() # density plot series.plot(kind='kde') pyplot.show() # box plot group by year
def time_dt_timegrouper_size(self): with warnings.catch_warnings(record=True): self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
# boxplots of time series from pandas import Series from pandas import DataFrame from pandas import TimeGrouper from matplotlib import pyplot series = Series.from_csv('dataset.csv') groups = series['1885':'1944'].groupby(TimeGrouper('10AS')) decades = DataFrame() for name, group in groups: decades[name.year] = group.values decades.boxplot() pyplot.show()
date = day.index.date[0] pdown = feat.loc[date]['probdown'] startprice = day.iloc[0]['open'] # we always pay the spread dr = - leverage_ig * 0.5 for index, row in day.iterrows(): if ( (pdown < 0.5 and (row['low'] < startprice - ig_stop_limit)) or (pdown >= 0.5 and (row['high'] > startprice + ig_stop_limit)) ): print('stoploss', index, pdown, row['low'], startprice) loss = dr - leverage_ig * (ig_stop_limit + 0.8) # keep on betting return loss + dayReturn(day[day.index > index]) if ( (pdown < 0.5 and row['open'] > startprice + ig_win_limit) or (pdown >= 0.5 and row['open'] < startprice - ig_win_limit) ): win = dr + leverage_ig * ig_win_limit return win + dayReturn(day[day.index > index]) sign = 1 if pdown > 0.5 else -1 return dr + (startprice - day.iloc[-1]['open']) * leverage_ig * sign returns = (quote .groupby(TimeGrouper('D')) .apply(dayReturn)) print(returns) print(returns.sum()) print(returns.max())
from pandas import DataFrame from pandas import TimeGrouper from matplotlib import pyplot import numpy as np import pandas as pd pdf = engineSample50cycleWindow.values # COMMAND ---------- series = Series.from_array(pdf) groups = series.groupby(TimeGrouper(1)) years = DataFrame() for name, group in groups: years[name.year] = group.values years = years.T pyplot.matshow(years, interpolation=None, aspect='auto') pyplot.show() # COMMAND ---------- import matplotlib.pyplot as plt dataSet = renamed_df.toPandas() fig, ax = plt.subplots() ax.scatter(dataSet['SPEED'], dataSet['LABEL1']) #scatterplot
parse_dates=[0], nrows=36, squeeze=True, date_parser=parser) print(series_shampoo.head(10)) print("\n") print('Size = %d \n' % series_shampoo.size) print("\n") print("Stats:") print(series_shampoo.describe()) print("Nr of NaNs = %d" % series_shampoo.isnull().sum()) # ### Time series plots # We group the data per year series_shampoo_groups = series_shampoo.groupby(TimeGrouper('A')) years = pd.DataFrame() for name, group in series_shampoo_groups: years[name.year] = group.values years.columns = ["2001", "2002", "2003"] # Prepare the figure where the plots will be placed fig, ((ax1, ax2), (ax3, ax4), (ax5, ax6), (ax7, ax8)) = plt.subplots(4, 2, sharex=False, sharey=False, figsize=(16, 28)) # Line plots
def chunkize_df_years(df, freq='Y'): ''' Slice DataFrame into years. ''' df = df.set_index('Time') return df.groupby(TimeGrouper(freq=freq))