def load(self, txtfileurl): """Load txt file specified by a url as a DataFrame""" datestr = txtfileurl[-10:-4] # check if we have the data saved already filepath = os.path.join(self.data_dir, datestr + ".pkl") if os.path.isfile(filepath): with open(filepath) as f: result = pickle.load(f) else: # detect whether this file is of the newer, cleaner format is_new = datetime.strptime(datestr, "%y%m%d") >= self.new_era if is_new: result = pd.read_csv( txtfileurl, parse_dates=[[6, 7]], index_col=["DATE_TIME"]).sort_index() result.columns = [ "C/A", "UNIT", "SCP", "STATION", "LINENAME", "DIVISION", "DESC", "ENTRIES", "EXITS" ] else: result = self._load_old(txtfileurl).sort_index() result["TURNSTILE_ID"] = result[["C/A", "UNIT", "SCP"]].apply( lambda x: " ".join(x), axis=1) result.drop(["C/A", "UNIT", "SCP"], axis=1, inplace=True) with open(filepath, "wb") as f: pickle.dump(result, f) return result
def _load_old(self, txtfileurl): """Load old format txt file which needs quite some reformating""" records = [] txtfile = urllib2.urlopen(txtfileurl) for line in txtfile: row = line.strip().split(",") if len(row) < 8: continue ca, unit, scp = row[:3] i = 3 while i < len(row): date, time, desc, entries, exits = row[i:i + 5] date_time = datetime.strptime(date + " " + time, "%m-%d-%y %H:%M:%S") record = dict( DATE_TIME=date_time, UNIT=unit, SCP=scp, DESC=desc, ENTRIES=int(entries), EXITS=int(exits)) record["C/A"] = ca records.append(record) i += 5 old_df = pd.DataFrame.from_records(records) return pd.merge( old_df, self.station_df, how="left").set_index(["DATE_TIME"])
def parser(x): return datetime.strptime('190' + x, '%Y-%m')
def parser(x): return datetime.strptime(x, '%Y/%m/%d')
def parser(x,y,z): x =x+':'+y+':'+z return datetime.strptime(x,' %d/%b/%Y:%H:%M')
def parser(x): return datetime.strptime('190'+x, '%Y-%m') url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/shampoo.csv'
def parser(x): return datetime.strptime(x, '%d/%m/%Y')
for i in range(len(d)): tmp_data = tmp_data + last_data_shift_list[-i-1] else: tmp_data = predict_value for i in range(len(d)): try: tmp_data = tmp_data.add(shift_ts_list[-i-1]) except: raise ValueError('What you input is not pd.Series type!') tmp_data.dropna(inplace=True) return tmp_data # return numpy.exp(tmp_data)也可以return到最原始,tmp_data是对原始数据取对数的结果 dateparse = lambda dates:datetime.strptime(dates,'%Y-%m') data=read_csv('AirPassengers.csv',parse_dates=[0],index_col=0,date_parser=dateparse); #data=read_csv('mmt.csv',parse_dates=[0],index_col=0,date_parser=dateparse); #data=read_csv('AirPassengers.csv'); #print(data.head()) ts=data['#Passengers'] pyplot.plot(data) pyplot.show() def rolling_statistics(timeseries): #Determing rolling statistics rolmean = timeseries.rolling(12).mean() rolstd = timeseries.rolling(12).mean() #Plot rolling statistics:
def parser(s): return datetime.strptime(s, '%Y-%m-%d %H:%M:%S.%f')
# print(X) # print(y) yhat = forecast_lstm(lstm_model, 1, X) # invert scaling yhat = invert_scale(scaler, X, yhat) # invert differencing yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i) # store forecast predictions.append(yhat) expected = raw_values[len(train) + i + 1] # time calculates if (currentMonth is 13): currentYear = currentYear + 1 currentMonth = 1 temp = str(str(currentYear) + '/' + str(currentMonth)) time.append(temp) currentMonth = currentMonth + 1 print('Month=%s, Predicted=%f, Expected=%f' % (temp, yhat, expected)) # report performance mse = mean_squared_error(raw_values[-228:], predictions) print('Test MSE: %.3f' % mse) # line plot of observed vs predicted xs = [datetime.strptime(t, '%Y/%m').date() for t in time] pyplot.plot(xs, raw_values[-228:], color="blue", label="actual") pyplot.plot(xs, predictions, color="red", linestyle='--', label="predict") pyplot.legend(loc='upper left') pyplot.xlabel('time(years)') pyplot.ylabel('NINO3.4/°C') pyplot.show()
]) # Piramide general para zm's del pais 2010 zm = ama2_grp( Region=ZMs[clave][0], data_fnam=data_fnam,\ N=N, out_fnam=out_fnam, init_index=init_index, init=init,\ intervention_day=intervention_day, relax_day=relax_day, trim=trim,\ Pobs_I=Pobs_I, Pobs_D=Pobs_D,\ R_rates=R_rates_V2, exit_probs=exit_probs_copy, workdir=workdir,\ ngrp=ngrp, Int_M=Int_M, age_prop=age_prop) zm.age_groups = [0, 25, 50, 65, 100] if T > 0: zm.RunMCMC(T=T, burnin=burnin, pred=pred, plot_fit=plot_fit) return zm dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d') #data Reg IRAG def PlotFigsZMs(zm, pred=99, q=[10, 25, 50, 75, 90], blue=True, workdir='./../'): close('all') try: zm_vmx_Hosp_RI = read_csv(workdir + "data/hosp/%s_DinHosp.csv" % (zm.clave, ), parse_dates=['fecha'], date_parser=dateparse)
def parser(x): return datetime.strptime(x, '%M-%S')
# # series_time['data_process'] = series_time['chl'] # series_time['data_process'][ # (series_time['shift_diff'] >= shiftDiff) & (series_time['shift1'] >= shiftDiff / 2) & ( # series_time['shift-1'] >= shiftDiff / 2)] = numpy.NaN # series_time.dropna(inplace=True) # series_time1 = series_time.resample('1H').mean() # series_time2 = series_time1.interpolate(method='time') # # #得到去重、补齐、重采样后的data # del series_time2['shift1'] # del series_time2['shift-1'] # del series_time2['shift_diff'] # del series_time2['data_process'] # series_time2.to_csv('logs/results/NBshuizhi2013040120180316_1H_7Feature.csv') dateparse1 = lambda dates: datetime.strptime(dates, '%Y-%m-%d %H:%M:%S') series_new = read_csv( 'logs/results/NBshuizhi2013040120180316_12H_7Feature_Edited1.csv', parse_dates=[0], index_col=0, usecols=[0, 1, 2, 3, 4, 5], engine='python', date_parser=dateparse1) series_time1 = series_new.resample('12H').mean() series_time2 = series_time1.interpolate(method='time') # series_new = series_new.resample(resample_res).mean() # series_time2.to_csv('logs/results/NBshuizhi2013040120180316_12H_7Feature_Edited1.csv') datavalues = series_time2.values groups = [0, 1, 2, 3, 4] i = 1 pyplot.figure()
def parser(x): return datetime.strptime('x', '%Y-%m-%d-%l-%s')
def parser(x): return datetime.strptime(x, '%b %Y')
model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False) model.reset_states() return model # make a one-step forecast def forecast_lstm(model, batch_size, X): X = X.reshape(1, 1, len(X)) yhat = model.predict(X, batch_size=batch_size) return yhat[0,0] # load dataset series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=lambda x: datetime.strptime('190'+x, '%Y-%m')) # transform data to be stationary raw_values = series.values diff_values = difference(raw_values, 1) # transform data to be supervised learning supervised = timeseries_to_supervised(diff_values, 1) supervised_values = supervised.values # split data into train and test-sets train, test = supervised_values[0:-12], supervised_values[-12:] # transform the scale of the data scaler, train_scaled, test_scaled = scale(train, test)
def parser(x): return datetime.strptime(x, '%m-%d-%H-%M')
def parser(x): return datetime.strptime(x,'%Y-%m-%d %H:%M:%S')
def parser(x): return datetime.strptime(x, '%y-%m')
def parser(x): return datetime.strptime(str(x), '%d-%m-%y')
if bic < init_bic: init_p = p init_q = q init_properModel = result_ARMA init_bic = bic return init_bic, init_p, init_q, init_properModel #差分序列还原 def predict_recover(ts): ts = np.exp(ts) return ts #数据读取 dateparse1 = lambda dates: datetime.strptime(dates, '%Y-%m-%d') series_new = read_csv('ZS01-溶解氧.csv', parse_dates=[0], index_col=0, usecols=[0, 1], engine='python', date_parser=dateparse1) train_data = series_new.values[:750] test_data = series_new.values[750:976] # order = st.arma_order_select_ic(diff1, max_ar=50, max_ma=50, ic=['aic', 'bic', 'hqic']) # model = ARMA(diff1, order.bic_min_order) init_bic, init_p, init_q, init_properModel = proper_model(train_data, 5) # result_arma = model.fit(disp=-1, method='css') train_predict = init_properModel.predict()
def parser(x): return datetime.strptime(f"190{x}", "%Y-%m")
from statistics import mean # type %matplotlib qt to the console df = pd.ExcelFile("Dataset - 1.xlsx") # load the excel df = df.parse('4567') # getting the 2nd sheet. df = df[['fullness_rate (%)', 'record_date']] values = df['fullness_rate (%)'] # parallel list's. values = list(values) dates = df['record_date'] dates = list(dates) # converting string dates for i in range(len(dates)): new_date = datetime.strptime(dates[i], '%Y-%m-%d %H:%M:%S+00:00') dates[i] = new_date # string to datetime. df['record_date'] = pd.DataFrame(dates) df.set_index('record_date', inplace=True) #ax = df.plot(title="Fullness Rate and Days" ,colormap='jet',marker='.') #ax.set_xlabel("Days") #ax.set_ylabel("Fullness Rate (%) ") ## converting series to stationary even though it's a small data. # AR model X = df.values X[88:] = X[88:] + 0.6635
def parser(x): return datetime.strptime(x, '%d-%m-%Y %H:%M')
def parser(x): if x.endswith('11') or x.endswith('12')or x.endswith('10'): return datetime.strptime(x, '%Y%m') else: return datetime.strptime(x, '%Y0%m')
def parser(x): return datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
def parser(x): # print(x) return datetime.strptime('190' + x, '%Y-%m')
def test_arimax(data, testdate, nbjourpred, nbjourtest, p=3, d=0, q=3, P=0, D=1, Q=2, s=24): from sklearn.metrics import mean_squared_error from math import sqrt horizon = nbjourpred nbjourtest = nbjourtest from datetime import timedelta from pandas import datetime test_date_time = datetime.strptime(testdate, '%d/%m/%Y') end_test = test_date_time + timedelta(days=horizon) end_train = test_date_time start_train = test_date_time - timedelta(days=nbjourtest) train = data[start_train:end_train] train.drop(train.tail(1).index, inplace=True) test = data[test_date_time:end_test] test.drop(test.tail(1).index, inplace=True) arima_model = sarimax(train, p, d, q, P, D, Q, s, getexplanatoryvariables(train)) result = dict() ##### horizon une journée end_test = test_date_time + timedelta(days=1) test = data[test_date_time:end_test] test.drop(test.tail(1).index, inplace=True) prevision = arima_model.predict(start=train.shape[0], end=train.shape[0] - 1 + 1 * 24, exog=getexplanatoryvariables(test)) mae = mean_absolute_error(test, prevision) rmse = sqrt(mean_squared_error(test, prevision)) result["MAE1"] = mae result["rmse1"] = rmse ##### horizon 2 journée end_test = test_date_time + timedelta(days=2) test = data[test_date_time:end_test] test.drop(test.tail(1).index, inplace=True) prevision = arima_model.predict(start=train.shape[0], end=train.shape[0] - 1 + 2 * 24, exog=getexplanatoryvariables(test)) mae = mean_absolute_error(test, prevision) rmse = sqrt(mean_squared_error(test, prevision)) result["MAE2"] = mae result["rmse2"] = rmse ##### horizon 3 journée end_test = test_date_time + timedelta(days=3) test = data[test_date_time:end_test] test.drop(test.tail(1).index, inplace=True) prevision = arima_model.predict(start=train.shape[0], end=train.shape[0] - 1 + 3 * 24, exog=getexplanatoryvariables(test)) mae = mean_absolute_error(test, prevision) rmse = sqrt(mean_squared_error(test, prevision)) result["MAE3"] = mae result["rmse3"] = rmse ##### horizon 4 journée end_test = test_date_time + timedelta(days=4) test = data[test_date_time:end_test] test.drop(test.tail(1).index, inplace=True) prevision = arima_model.predict(start=train.shape[0], end=train.shape[0] - 1 + 4 * 24, exog=getexplanatoryvariables(test)) mae = mean_absolute_error(test, prevision) rmse = sqrt(mean_squared_error(test, prevision)) result["MAE4"] = mae result["rmse4"] = rmse ##### horizon 7 journée end_test = test_date_time + timedelta(days=7) test = data[test_date_time:end_test] test.drop(test.tail(1).index, inplace=True) prevision = arima_model.predict(start=train.shape[0], end=train.shape[0] - 1 + 7 * 24, exog=getexplanatoryvariables(test)) mae = mean_absolute_error(test, prevision) rmse = sqrt(mean_squared_error(test, prevision)) result["MAE7"] = mae result["rmse7"] = rmse return result
def parser(x): return datetime.strptime(x, '%Y %m %d %H')
def parser(x): return datetime.strptime(x,'%Y-%m-%d')
class Command(BaseCommand): help = "Run multiple searches and log the cheapest and best value switches" requires_migrations_checks = True base_data = { 'data_mining': True, 'source_market': 'UK', 'place_name': '', 'latitude': '0', 'longitude': '0', 'occupants': '2', 'currency': 'gbp', 'county': '', } base_check_in = datetime.strptime('2017-05-05', '%Y-%m-%d') check_in_range = date_range(base_check_in, base_check_in + DateOffset(days=60)) cities = [ { 'city': 'New York', 'state': 'NY', 'country': 'US', }, { 'city': 'Paradise', 'state': 'NV', 'country': 'US', }, { 'city': 'Austin', 'state': 'TX', 'country': 'US', }, { 'city': 'London', 'state': 'England', 'country': 'GB', }, { 'city': 'Barcelona', 'state': 'CT', 'country': 'ES', }, { 'city': 'Milan', 'state': 'Lombardy', 'country': 'IT', }, { 'city': 'Shanghai', 'state': 'Shanghai', 'country': 'CN', }, { 'city': 'Bangkok', 'state': '', 'country': 'TH', }, { 'city': 'Singapore', 'state': '', 'country': 'SG', }, ] stay_durations = [3, 4, 5, 6] def handle(self, *args, **options): try: os.rename('analysis_output.csv', 'check_file_access.csv') os.rename('check_file_access.csv', 'analysis_output.csv') except OSError: raise Exception( 'Destination file is still open. Please close before running!') all_stays = [] for city in self.cities: for check_in in self.check_in_range: for duration in self.stay_durations: check_out = check_in + DateOffset(days=duration) check_in_range = date_range(check_in, check_out - DateOffset(days=1)) data = self.base_data.copy() data.update({ 'checkIn': check_in, 'checkOut': check_out, 'check_in_range': check_in_range, 'country': city['country'], 'state': city['state'], 'city': city['city'], }) stays = tasks.execute_search(data, '', None) result_count = len(stays) if result_count == 0: continue stays.query('hotel_2_id != -1', inplace=True) grouping_columns = [ 'primary_star_rating', 'min_review_tier' ] stays.sort_values('stay_cost', inplace=True) unrestricted_low_cost_stays = stays.groupby( grouping_columns).nth(0) unrestricted_low_cost_stays['restricted'] = False stays.sort_values('cost_per_quality_unit', inplace=True) unrestricted_best_value_stays = stays.groupby( grouping_columns).nth(0) unrestricted_best_value_stays['restricted'] = False switches_with_both_benchmarks = \ 'entire_stay_cost_1 == entire_stay_cost_1 \ and entire_stay_cost_2 == entire_stay_cost_2' stays.query(switches_with_both_benchmarks, inplace=True) stays.sort_values('stay_cost', inplace=True) restricted_low_cost_stays = stays.groupby( grouping_columns).nth(0) restricted_low_cost_stays['restricted'] = True stays.sort_values('cost_per_quality_unit', inplace=True) restricted_best_value_stays = stays.groupby( grouping_columns).nth(0) restricted_best_value_stays['restricted'] = True scenarios = [ unrestricted_low_cost_stays, unrestricted_best_value_stays, restricted_low_cost_stays, restricted_best_value_stays, ] stays = concat(scenarios) stays.reset_index(inplace=True) stays.drop_duplicates(inplace=True) stays['city'] = city['city'] stays['check_in'] = check_in stays['duration'] = duration stays['result_count'] = result_count all_stays.append(stays) logger.warn('{}, {:%Y-%m-%d}, {}'.format( city['city'], check_in, duration)) stays = concat(all_stays).to_csv('analysis_output.csv', index=False)
def main(): """ 主函数 """ # 加载数据 data_dir = './jdata/' sales = pd.read_csv(os.path.join(data_dir, 'sku_sales.csv')) items = pd.read_csv(os.path.join(data_dir, 'sku_info.csv')) attr = pd.read_csv(os.path.join(data_dir, 'sku_attr.csv')) promo = pd.read_csv(os.path.join(data_dir, 'sku_prom.csv')) quantile = pd.read_csv(os.path.join(data_dir, 'sku_quantile.csv')) promo_test = pd.read_csv(os.path.join(data_dir, 'sku_prom_testing_2018Jan.csv')) sales['datetime'] = sales['date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d')) # left join items sales_items = pd.merge(sales, items, how='left', on='item_sku_id') sales = sales_items n_seq = 8 n_feature = 2 n_batch = 1 n_epochs = 50 n_neurons = 3 all_off_sets = np.arange(1, 32) #all_off_sets = np.arange(1, 2) all_item_sku_ids = items['item_sku_id'].unique() #all_item_sku_ids=np.arange(1, 2) result_train = [] result_test = [] print("预处理:") for off_set in all_off_sets: for item_sku_id in all_item_sku_ids: sales_single_sku = sales[sales['item_sku_id'] == item_sku_id].sort_values(['dc_id', 'datetime']) supervised_values, train, test = series_to_supervised_1(sales_single_sku, n_seq, off_set) if (len(result_train)) == 0: result_train = train result_test = test else: result_train = pd.concat([result_train, train]) result_test = pd.concat([result_test, test]) all_train = result_train.copy y_train = result_train.pop('var0') x_train = result_train all_test = result_test.copy y_test = result_test.pop('var0') x_test = result_test #训练 clf = RandomForestRegressor(n_estimators=100, criterion='mae', verbose=1) # 这里使用100个决策树 # clf.fit(x_train.head(10000),y_train.head(10000)) clf.fit(x_train, y_train) score = clf.score(x_test, y_test) result = clf.predict(x_test) # calculate MSE mse = mean_squared_error(y_test, result) print('Test MSE: %.3f' % mse) # submit columns = ['vendibility', 'quantity'] features = [] result = [] for off_set in all_off_sets: for item_sku_id in all_item_sku_ids: dcs = sales[sales['item_sku_id'] == item_sku_id]['dc_id'].unique() for dc in dcs: feature = {} sales_single_sku_submit = sales[sales['item_sku_id'] == item_sku_id][sales['dc_id'] == dc].sort_values( ['dc_id', 'datetime']).tail(n_seq) sales_single_sku_submit_feature = sales_single_sku_submit[columns].values.reshape(1, n_seq, n_feature) item_first_cate_cd = sales.head(1)['item_first_cate_cd'].unique()[0] item_second_cate_cd = sales.head(1)['item_second_cate_cd'].unique()[0] item_third_cate_cd = sales.head(1)['item_third_cate_cd'].unique()[0] brand_code = sales.head(1)['brand_code'].unique()[0] item_phase = np.array([brand_code, dc, item_first_cate_cd, item_second_cate_cd, item_third_cate_cd]) sales_single_sku_submit_feature = np.append(item_phase, sales_single_sku_submit_feature) quantity = clf.predict(sales_single_sku_submit_feature) feature['date'] = off_set feature['dc_id'] = dc feature['item_sku_id'] = item_sku_id feature['quantity'] = quantity features.append(feature) result = pd.DataFrame(features) print('预测-save:all') result.to_csv('./jdata_out/submit_rf_20180724.csv', index=False)
def parser_one(x): return datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
def main(): """ 主函数 """ # 加载数据 data_dir = './jdata/' sales = pd.read_csv(os.path.join(data_dir, 'sku_sales.csv')) items = pd.read_csv(os.path.join(data_dir, 'sku_info.csv')) attr = pd.read_csv(os.path.join(data_dir, 'sku_attr.csv')) promo = pd.read_csv(os.path.join(data_dir, 'sku_prom.csv')) quantile = pd.read_csv(os.path.join(data_dir, 'sku_quantile.csv')) promo_test = pd.read_csv(os.path.join(data_dir, 'sku_prom_testing_2018Jan.csv')) sales['datetime'] = sales['date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d')) # LSTM 参数 n_seq = 8 n_feature = 2 n_batch = 1 n_epochs = 100 n_neurons = 3 # 所有的item_sku_id columns = ['vendibility', 'quantity'] features = [] result = [] all_off_sets = np.arange(1, 32) # all_off_sets = np.arange(1, 3) all_item_sku_ids = items['item_sku_id'].unique() # all_item_sku_ids=np.arange(100, 102) for off_set in all_off_sets: all_train = [] all_test = [] # 调用方法训练,预测 for item_sku_id in all_item_sku_ids: # 准备时序数据 sales_single_sku = sales[sales['item_sku_id'] == item_sku_id].sort_values(['dc_id', 'datetime']) supervised_values, train, test = series_to_supervised_1(sales_single_sku, n_seq, off_set) if(len(all_train) == 0): all_train = train all_test = test else: all_train = all_train.append(train) all_test = all_test.append(test) print('训练: off_set={}'.format(off_set)) model = fit_lstm(train.values, n_seq, n_feature, n_batch, n_epochs, n_neurons) #eval_lstm(model, test, n_seq, n_feature, n_batch) # 调用方法训练,预测 for item_sku_id in all_item_sku_ids: # for submit # 获取6个不同的dc dcs = sales[sales['item_sku_id'] == item_sku_id]['dc_id'].unique() for dc in dcs: feature = {} sales_single_sku = sales[sales['item_sku_id'] == item_sku_id][sales['dc_id'] == dc].sort_values( ['dc_id', 'datetime']) sales_single_sku_submit = sales_single_sku.tail(n_seq) feature['date'] = off_set feature['dc_id'] = dc feature['item_sku_id'] = item_sku_id if(len(sales_single_sku) == 0): feature['quantity'] = 0 if (len(sales_single_sku_submit) < n_seq and len(sales_single_sku_submit) > 0): feature['quantity'] = sales_single_sku.tail(1)['quantity'].unique()[0] if (len(sales_single_sku_submit) == n_seq ): sales_single_sku_submit_feature = sales_single_sku_submit[columns].values.reshape(1, n_seq, n_feature) quantity = forecast_lstm(model, sales_single_sku_submit_feature, n_batch)[0] feature['quantity'] = quantity features.append(feature) print('预测-save:item_sku_id={}, off_set={}'.format(item_sku_id, off_set)) result = pd.DataFrame(features) print('预测-save:item_sku_id={}, all offset'.format(item_sku_id)) result = pd.DataFrame(features) result.to_csv('./jdata_out/submit.csv', index=False) result = pd.DataFrame(features) print('预测-save:all') result.to_csv('./jdata_out/submit.csv', index=False)
def parser(x): return datetime.strptime(x, '%m/%d/%Y')
def parser(date_string): return datetime.strptime(date_string, '%Y-%m-%d')
# =========================== # Reading configuration files # =========================== data_conf = utils.Get_Data_From_JSON(ROOT_DIR + "/dbfs" + cwd + "data.json") model_conf = utils.Get_Data_From_JSON(ROOT_DIR + "/dbfs" + cwd + "config.json") print(data_conf) print(model_conf) # Time window and number of customers start_date, end_date = data_conf['start_date'], data_conf['end_date'] N_days_X, N_days_y = int(data_conf['number_of_historical_days']), int( data_conf['number_of_predicted_days']) # 365, 92 end_date_dt = datetime.strptime(end_date, "%Y-%m-%d") start_date_for_prediction_dt = end_date_dt - relativedelta(days=N_days_X + N_days_y) start_date_for_prediction = start_date_for_prediction_dt.strftime("%Y-%m-%d") start_date_dt, end_date_dt, start_date_prediction, end_date_prediction, end_date_plusOneDay, end_date_minus_6month = utils.dates_definitions( start_date, end_date, N_days_X, N_days_y) time_range = pd.date_range(start_date, end_date, freq='D') # Type of dataset desired # Case we want a dataset to train a model: use 1e5 and serving_mode=False # Case we want an unseen dataset to serve the model on: use 2.5e6 and serving_mode=True N_customers = 1e2 #2.5e6 serving_mode = False # True if creating data for serving
def parser(x): return datetime.strptime('190'+x, '%Y-%m')
def complete(request, date=None): """ Ajax view for info complete insert positions into db :param date: str :param request: dict :rtype : render """ try: # get path then open file path = OpenDir().get_path(date) fname = OpenDir().get_fname_from_path(path) # after opening, date need to minus one pd_date = datetime.strptime(date, '%Y-%m-%d') pd_date = pd_date - BDay(1) date = pd_date.strftime('%Y-%m-%d') # continues... positions, overall = OpenPosCSV(path).read() for position in positions: # save positions pos = pm.Position( symbol=position['Symbol'], company=position['Company'], date=date ) pos.save() # save instrument instrument = pm.PositionInstrument() instrument.set_dict(position['Instrument']) instrument.position = pos instrument.save() # save stock stock = pm.PositionStock() stock.set_dict(position['Stock']) stock.position = pos stock.save() # save options for pos_option in position['Options']: option = pm.PositionOption() option.set_dict(pos_option) option.position = pos option.save() pos_overall = pm.Overall(**overall) pos_overall.date = date pos_overall.save() # move files into completed folder rename(path, FILES['tos_positions_completed'] + fname) # set parameters into templates parameters = { 'date': str(date), 'fname': str(fname) } except IOError: # set parameters into templates parameters = { 'date': '', 'fname': '' } return HttpResponse( parameters.__str__(), content_type='application/json' )