Beispiel #1
0
    def load(self, txtfileurl):
        """Load txt file specified by a url as a DataFrame"""
        datestr = txtfileurl[-10:-4]
        # check if we have the data saved already
        filepath = os.path.join(self.data_dir, datestr + ".pkl")
        if os.path.isfile(filepath):
            with open(filepath) as f:
                result = pickle.load(f)
        else:
            # detect whether this file is of the newer, cleaner format
            is_new = datetime.strptime(datestr, "%y%m%d") >= self.new_era
            if is_new:
                result = pd.read_csv(
                    txtfileurl, parse_dates=[[6, 7]],
                    index_col=["DATE_TIME"]).sort_index()
                result.columns = [
                    "C/A", "UNIT", "SCP", "STATION", "LINENAME", "DIVISION",
                    "DESC", "ENTRIES", "EXITS"
                ]
            else:
                result = self._load_old(txtfileurl).sort_index()

            result["TURNSTILE_ID"] = result[["C/A", "UNIT", "SCP"]].apply(
                lambda x: " ".join(x), axis=1)
            result.drop(["C/A", "UNIT", "SCP"], axis=1, inplace=True)
            with open(filepath, "wb") as f:
                pickle.dump(result, f)

        return result
Beispiel #2
0
 def _load_old(self, txtfileurl):
     """Load old format txt file which needs quite some reformating"""
     records = []
     txtfile = urllib2.urlopen(txtfileurl)
     for line in txtfile:
         row = line.strip().split(",")
         if len(row) < 8:
             continue
         ca, unit, scp = row[:3]
         i = 3
         while i < len(row):
             date, time, desc, entries, exits = row[i:i + 5]
             date_time = datetime.strptime(date + " " + time,
                                           "%m-%d-%y %H:%M:%S")
             record = dict(
                 DATE_TIME=date_time,
                 UNIT=unit,
                 SCP=scp,
                 DESC=desc,
                 ENTRIES=int(entries),
                 EXITS=int(exits))
             record["C/A"] = ca
             records.append(record)
             i += 5
     old_df = pd.DataFrame.from_records(records)
     return pd.merge(
         old_df, self.station_df, how="left").set_index(["DATE_TIME"])
Beispiel #3
0
def parser(x):
    return datetime.strptime('190' + x, '%Y-%m')
Beispiel #4
0
def parser(x):
    return datetime.strptime(x, '%Y/%m/%d')
Beispiel #5
0
def parser(x,y,z):
    x =x+':'+y+':'+z
    return datetime.strptime(x,' %d/%b/%Y:%H:%M')
Beispiel #6
0
def parser(x):	return datetime.strptime('190'+x, '%Y-%m')
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/shampoo.csv'
def parser(x):
    return datetime.strptime(x, '%d/%m/%Y')
Beispiel #8
0
        for i in range(len(d)):
            tmp_data = tmp_data + last_data_shift_list[-i-1]
    else:
        tmp_data = predict_value
        for i in range(len(d)):
            try:
                tmp_data = tmp_data.add(shift_ts_list[-i-1])
            except:
                raise ValueError('What you input is not pd.Series type!')
        tmp_data.dropna(inplace=True)
    return tmp_data # return numpy.exp(tmp_data)也可以return到最原始,tmp_data是对原始数据取对数的结果




dateparse = lambda dates:datetime.strptime(dates,'%Y-%m')
data=read_csv('AirPassengers.csv',parse_dates=[0],index_col=0,date_parser=dateparse);
#data=read_csv('mmt.csv',parse_dates=[0],index_col=0,date_parser=dateparse);
#data=read_csv('AirPassengers.csv');
#print(data.head())
ts=data['#Passengers']
pyplot.plot(data)
pyplot.show()


def rolling_statistics(timeseries):
    #Determing rolling statistics
    rolmean = timeseries.rolling(12).mean()
    rolstd = timeseries.rolling(12).mean()

    #Plot rolling statistics:
Beispiel #9
0
def parser(s):
    return datetime.strptime(s, '%Y-%m-%d %H:%M:%S.%f')
    # print(X)
    # print(y)
    yhat = forecast_lstm(lstm_model, 1, X)
    # invert scaling
    yhat = invert_scale(scaler, X, yhat)
    # invert differencing
    yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i)
    # store forecast
    predictions.append(yhat)
    expected = raw_values[len(train) + i + 1]

    # time calculates
    if (currentMonth is 13):
        currentYear = currentYear + 1
        currentMonth = 1
    temp = str(str(currentYear) + '/' + str(currentMonth))
    time.append(temp)
    currentMonth = currentMonth + 1
    print('Month=%s, Predicted=%f, Expected=%f' % (temp, yhat, expected))

# report performance
mse = mean_squared_error(raw_values[-228:], predictions)
print('Test MSE: %.3f' % mse)
# line plot of observed vs predicted
xs = [datetime.strptime(t, '%Y/%m').date() for t in time]
pyplot.plot(xs, raw_values[-228:], color="blue", label="actual")
pyplot.plot(xs, predictions, color="red", linestyle='--', label="predict")
pyplot.legend(loc='upper left')
pyplot.xlabel('time(years)')
pyplot.ylabel('NINO3.4/°C')
pyplot.show()
Beispiel #11
0
                          ])  # Piramide general para zm's del pais 2010

    zm = ama2_grp( Region=ZMs[clave][0], data_fnam=data_fnam,\
        N=N, out_fnam=out_fnam, init_index=init_index, init=init,\
        intervention_day=intervention_day, relax_day=relax_day, trim=trim,\
        Pobs_I=Pobs_I, Pobs_D=Pobs_D,\
        R_rates=R_rates_V2, exit_probs=exit_probs_copy, workdir=workdir,\
        ngrp=ngrp, Int_M=Int_M, age_prop=age_prop)
    zm.age_groups = [0, 25, 50, 65, 100]

    if T > 0:
        zm.RunMCMC(T=T, burnin=burnin, pred=pred, plot_fit=plot_fit)
    return zm


dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d')  #data Reg IRAG


def PlotFigsZMs(zm,
                pred=99,
                q=[10, 25, 50, 75, 90],
                blue=True,
                workdir='./../'):

    close('all')

    try:
        zm_vmx_Hosp_RI = read_csv(workdir + "data/hosp/%s_DinHosp.csv" %
                                  (zm.clave, ),
                                  parse_dates=['fecha'],
                                  date_parser=dateparse)
Beispiel #12
0
def parser(x):
    return datetime.strptime(x, '%M-%S')
#
# series_time['data_process'] = series_time['chl']
# series_time['data_process'][
#     (series_time['shift_diff'] >= shiftDiff) & (series_time['shift1'] >= shiftDiff / 2) & (
#     series_time['shift-1'] >= shiftDiff / 2)] = numpy.NaN
# series_time.dropna(inplace=True)
# series_time1 = series_time.resample('1H').mean()
# series_time2 = series_time1.interpolate(method='time')
#
# #得到去重、补齐、重采样后的data
# del series_time2['shift1']
# del series_time2['shift-1']
# del series_time2['shift_diff']
# del series_time2['data_process']
# series_time2.to_csv('logs/results/NBshuizhi2013040120180316_1H_7Feature.csv')
dateparse1 = lambda dates: datetime.strptime(dates, '%Y-%m-%d %H:%M:%S')
series_new = read_csv(
    'logs/results/NBshuizhi2013040120180316_12H_7Feature_Edited1.csv',
    parse_dates=[0],
    index_col=0,
    usecols=[0, 1, 2, 3, 4, 5],
    engine='python',
    date_parser=dateparse1)
series_time1 = series_new.resample('12H').mean()
series_time2 = series_time1.interpolate(method='time')
# series_new = series_new.resample(resample_res).mean()
# series_time2.to_csv('logs/results/NBshuizhi2013040120180316_12H_7Feature_Edited1.csv')
datavalues = series_time2.values
groups = [0, 1, 2, 3, 4]
i = 1
pyplot.figure()
Beispiel #14
0
def parser(x):
    return datetime.strptime('x', '%Y-%m-%d-%l-%s')
Beispiel #15
0
def parser(x):
	return datetime.strptime(x, '%b %Y')
Beispiel #16
0
    model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
        model.reset_states()
    return model

# make a one-step forecast
def forecast_lstm(model, batch_size, X):
    X = X.reshape(1, 1, len(X))
    yhat = model.predict(X, batch_size=batch_size)
    return yhat[0,0]

# load dataset
series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=lambda x: datetime.strptime('190'+x, '%Y-%m'))

# transform data to be stationary
raw_values = series.values
diff_values = difference(raw_values, 1)

# transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values

# split data into train and test-sets
train, test = supervised_values[0:-12], supervised_values[-12:]

# transform the scale of the data
scaler, train_scaled, test_scaled = scale(train, test)
Beispiel #17
0
def parser(x):
    return datetime.strptime(x, '%m-%d-%H-%M')
def parser(x):
    return datetime.strptime(x,'%Y-%m-%d %H:%M:%S')
Beispiel #19
0
def parser(x):
    return datetime.strptime(x, '%y-%m')
def parser(x):
	return datetime.strptime(str(x), '%d-%m-%y')
Beispiel #21
0
            if bic < init_bic:
                init_p = p
                init_q = q
                init_properModel = result_ARMA
                init_bic = bic
    return init_bic, init_p, init_q, init_properModel


#差分序列还原
def predict_recover(ts):
    ts = np.exp(ts)
    return ts


#数据读取
dateparse1 = lambda dates: datetime.strptime(dates, '%Y-%m-%d')
series_new = read_csv('ZS01-溶解氧.csv',
                      parse_dates=[0],
                      index_col=0,
                      usecols=[0, 1],
                      engine='python',
                      date_parser=dateparse1)

train_data = series_new.values[:750]
test_data = series_new.values[750:976]
# order = st.arma_order_select_ic(diff1, max_ar=50, max_ma=50, ic=['aic', 'bic', 'hqic'])
# model = ARMA(diff1, order.bic_min_order)
init_bic, init_p, init_q, init_properModel = proper_model(train_data, 5)
# result_arma = model.fit(disp=-1, method='css')

train_predict = init_properModel.predict()
def parser(x):
    return datetime.strptime(f"190{x}", "%Y-%m")
Beispiel #23
0
from statistics import mean
# type %matplotlib qt  to the console

df = pd.ExcelFile("Dataset - 1.xlsx")  # load the excel

df = df.parse('4567')  # getting the 2nd sheet.

df = df[['fullness_rate (%)', 'record_date']]
values = df['fullness_rate (%)']  # parallel list's.
values = list(values)
dates = df['record_date']
dates = list(dates)

# converting string dates
for i in range(len(dates)):
    new_date = datetime.strptime(dates[i], '%Y-%m-%d %H:%M:%S+00:00')
    dates[i] = new_date

# string to datetime.
df['record_date'] = pd.DataFrame(dates)
df.set_index('record_date', inplace=True)

#ax = df.plot(title="Fullness Rate and Days" ,colormap='jet',marker='.')
#ax.set_xlabel("Days")
#ax.set_ylabel("Fullness Rate (%) ")

## converting series to stationary even though it's a small data.

# AR model
X = df.values
X[88:] = X[88:] + 0.6635
Beispiel #24
0
def parser(x):
    return datetime.strptime(x, '%d-%m-%Y %H:%M')
Beispiel #25
0
def parser(x):
    if x.endswith('11') or x.endswith('12')or x.endswith('10'):
        return datetime.strptime(x, '%Y%m')
    else:
       return datetime.strptime(x, '%Y0%m') 
Beispiel #26
0
def parser(x):
    return datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
Beispiel #27
0
def parser(x):
    #        print(x)
    return datetime.strptime('190' + x, '%Y-%m')
Beispiel #28
0
def test_arimax(data,
                testdate,
                nbjourpred,
                nbjourtest,
                p=3,
                d=0,
                q=3,
                P=0,
                D=1,
                Q=2,
                s=24):
    from sklearn.metrics import mean_squared_error
    from math import sqrt
    horizon = nbjourpred
    nbjourtest = nbjourtest
    from datetime import timedelta
    from pandas import datetime
    test_date_time = datetime.strptime(testdate, '%d/%m/%Y')
    end_test = test_date_time + timedelta(days=horizon)
    end_train = test_date_time
    start_train = test_date_time - timedelta(days=nbjourtest)
    train = data[start_train:end_train]
    train.drop(train.tail(1).index, inplace=True)
    test = data[test_date_time:end_test]
    test.drop(test.tail(1).index, inplace=True)

    arima_model = sarimax(train, p, d, q, P, D, Q, s,
                          getexplanatoryvariables(train))

    result = dict()
    ##### horizon une journée
    end_test = test_date_time + timedelta(days=1)
    test = data[test_date_time:end_test]
    test.drop(test.tail(1).index, inplace=True)
    prevision = arima_model.predict(start=train.shape[0],
                                    end=train.shape[0] - 1 + 1 * 24,
                                    exog=getexplanatoryvariables(test))
    mae = mean_absolute_error(test, prevision)
    rmse = sqrt(mean_squared_error(test, prevision))

    result["MAE1"] = mae
    result["rmse1"] = rmse
    ##### horizon 2 journée

    end_test = test_date_time + timedelta(days=2)
    test = data[test_date_time:end_test]
    test.drop(test.tail(1).index, inplace=True)
    prevision = arima_model.predict(start=train.shape[0],
                                    end=train.shape[0] - 1 + 2 * 24,
                                    exog=getexplanatoryvariables(test))
    mae = mean_absolute_error(test, prevision)
    rmse = sqrt(mean_squared_error(test, prevision))

    result["MAE2"] = mae
    result["rmse2"] = rmse

    ##### horizon 3 journée

    end_test = test_date_time + timedelta(days=3)
    test = data[test_date_time:end_test]
    test.drop(test.tail(1).index, inplace=True)

    prevision = arima_model.predict(start=train.shape[0],
                                    end=train.shape[0] - 1 + 3 * 24,
                                    exog=getexplanatoryvariables(test))
    mae = mean_absolute_error(test, prevision)
    rmse = sqrt(mean_squared_error(test, prevision))

    result["MAE3"] = mae
    result["rmse3"] = rmse

    ##### horizon 4 journée

    end_test = test_date_time + timedelta(days=4)
    test = data[test_date_time:end_test]
    test.drop(test.tail(1).index, inplace=True)

    prevision = arima_model.predict(start=train.shape[0],
                                    end=train.shape[0] - 1 + 4 * 24,
                                    exog=getexplanatoryvariables(test))
    mae = mean_absolute_error(test, prevision)
    rmse = sqrt(mean_squared_error(test, prevision))

    result["MAE4"] = mae
    result["rmse4"] = rmse

    ##### horizon 7 journée

    end_test = test_date_time + timedelta(days=7)
    test = data[test_date_time:end_test]
    test.drop(test.tail(1).index, inplace=True)

    prevision = arima_model.predict(start=train.shape[0],
                                    end=train.shape[0] - 1 + 7 * 24,
                                    exog=getexplanatoryvariables(test))
    mae = mean_absolute_error(test, prevision)
    rmse = sqrt(mean_squared_error(test, prevision))

    result["MAE7"] = mae
    result["rmse7"] = rmse
    return result
Beispiel #29
0
def parser(x):
    return datetime.strptime(x, '%Y %m %d %H')
def parser(x):
    return datetime.strptime(x,'%Y-%m-%d')
class Command(BaseCommand):
    help = "Run multiple searches and log the cheapest and best value switches"
    requires_migrations_checks = True

    base_data = {
        'data_mining': True,
        'source_market': 'UK',
        'place_name': '',
        'latitude': '0',
        'longitude': '0',
        'occupants': '2',
        'currency': 'gbp',
        'county': '',
    }

    base_check_in = datetime.strptime('2017-05-05', '%Y-%m-%d')
    check_in_range = date_range(base_check_in,
                                base_check_in + DateOffset(days=60))

    cities = [
        {
            'city': 'New York',
            'state': 'NY',
            'country': 'US',
        },
        {
            'city': 'Paradise',
            'state': 'NV',
            'country': 'US',
        },
        {
            'city': 'Austin',
            'state': 'TX',
            'country': 'US',
        },
        {
            'city': 'London',
            'state': 'England',
            'country': 'GB',
        },
        {
            'city': 'Barcelona',
            'state': 'CT',
            'country': 'ES',
        },
        {
            'city': 'Milan',
            'state': 'Lombardy',
            'country': 'IT',
        },
        {
            'city': 'Shanghai',
            'state': 'Shanghai',
            'country': 'CN',
        },
        {
            'city': 'Bangkok',
            'state': '',
            'country': 'TH',
        },
        {
            'city': 'Singapore',
            'state': '',
            'country': 'SG',
        },
    ]

    stay_durations = [3, 4, 5, 6]

    def handle(self, *args, **options):
        try:
            os.rename('analysis_output.csv', 'check_file_access.csv')
            os.rename('check_file_access.csv', 'analysis_output.csv')
        except OSError:
            raise Exception(
                'Destination file is still open. Please close before running!')

        all_stays = []

        for city in self.cities:
            for check_in in self.check_in_range:
                for duration in self.stay_durations:
                    check_out = check_in + DateOffset(days=duration)
                    check_in_range = date_range(check_in,
                                                check_out - DateOffset(days=1))
                    data = self.base_data.copy()
                    data.update({
                        'checkIn': check_in,
                        'checkOut': check_out,
                        'check_in_range': check_in_range,
                        'country': city['country'],
                        'state': city['state'],
                        'city': city['city'],
                    })

                    stays = tasks.execute_search(data, '', None)
                    result_count = len(stays)
                    if result_count == 0:
                        continue

                    stays.query('hotel_2_id != -1', inplace=True)
                    grouping_columns = [
                        'primary_star_rating', 'min_review_tier'
                    ]

                    stays.sort_values('stay_cost', inplace=True)
                    unrestricted_low_cost_stays = stays.groupby(
                        grouping_columns).nth(0)
                    unrestricted_low_cost_stays['restricted'] = False

                    stays.sort_values('cost_per_quality_unit', inplace=True)
                    unrestricted_best_value_stays = stays.groupby(
                        grouping_columns).nth(0)
                    unrestricted_best_value_stays['restricted'] = False

                    switches_with_both_benchmarks = \
                        'entire_stay_cost_1 == entire_stay_cost_1 \
                        and entire_stay_cost_2 == entire_stay_cost_2'

                    stays.query(switches_with_both_benchmarks, inplace=True)

                    stays.sort_values('stay_cost', inplace=True)
                    restricted_low_cost_stays = stays.groupby(
                        grouping_columns).nth(0)
                    restricted_low_cost_stays['restricted'] = True

                    stays.sort_values('cost_per_quality_unit', inplace=True)
                    restricted_best_value_stays = stays.groupby(
                        grouping_columns).nth(0)
                    restricted_best_value_stays['restricted'] = True

                    scenarios = [
                        unrestricted_low_cost_stays,
                        unrestricted_best_value_stays,
                        restricted_low_cost_stays,
                        restricted_best_value_stays,
                    ]

                    stays = concat(scenarios)
                    stays.reset_index(inplace=True)
                    stays.drop_duplicates(inplace=True)

                    stays['city'] = city['city']
                    stays['check_in'] = check_in
                    stays['duration'] = duration
                    stays['result_count'] = result_count

                    all_stays.append(stays)
                    logger.warn('{}, {:%Y-%m-%d}, {}'.format(
                        city['city'], check_in, duration))

        stays = concat(all_stays).to_csv('analysis_output.csv', index=False)
Beispiel #32
0
def main():
    """
        主函数
    """
    # 加载数据
    data_dir = './jdata/'
    sales = pd.read_csv(os.path.join(data_dir, 'sku_sales.csv'))
    items = pd.read_csv(os.path.join(data_dir, 'sku_info.csv'))
    attr = pd.read_csv(os.path.join(data_dir, 'sku_attr.csv'))
    promo = pd.read_csv(os.path.join(data_dir, 'sku_prom.csv'))
    quantile = pd.read_csv(os.path.join(data_dir, 'sku_quantile.csv'))
    promo_test = pd.read_csv(os.path.join(data_dir, 'sku_prom_testing_2018Jan.csv'))

    sales['datetime'] = sales['date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d'))

    # left join items
    sales_items = pd.merge(sales, items, how='left', on='item_sku_id')
    sales = sales_items

    n_seq = 8
    n_feature = 2
    n_batch = 1
    n_epochs = 50
    n_neurons = 3
    all_off_sets = np.arange(1, 32)
    #all_off_sets = np.arange(1, 2)
    all_item_sku_ids = items['item_sku_id'].unique()
    #all_item_sku_ids=np.arange(1, 2)
    result_train = []
    result_test = []
    print("预处理:")
    for off_set in all_off_sets:
        for item_sku_id in all_item_sku_ids:
            sales_single_sku = sales[sales['item_sku_id'] == item_sku_id].sort_values(['dc_id', 'datetime'])
            supervised_values, train, test = series_to_supervised_1(sales_single_sku, n_seq, off_set)
            if (len(result_train)) == 0:
                result_train = train
                result_test = test
            else:
                result_train = pd.concat([result_train, train])
                result_test = pd.concat([result_test, test])

    all_train = result_train.copy
    y_train = result_train.pop('var0')
    x_train = result_train
    all_test = result_test.copy
    y_test = result_test.pop('var0')
    x_test = result_test

    #训练
    clf = RandomForestRegressor(n_estimators=100, criterion='mae', verbose=1)  # 这里使用100个决策树
    # clf.fit(x_train.head(10000),y_train.head(10000))
    clf.fit(x_train, y_train)
    score = clf.score(x_test, y_test)
    result = clf.predict(x_test)
    # calculate MSE
    mse = mean_squared_error(y_test, result)
    print('Test MSE: %.3f' % mse)

    # submit
    columns = ['vendibility', 'quantity']
    features = []
    result = []
    for off_set in all_off_sets:
        for item_sku_id in all_item_sku_ids:
            dcs = sales[sales['item_sku_id'] == item_sku_id]['dc_id'].unique()
            for dc in dcs:
                feature = {}
                sales_single_sku_submit = sales[sales['item_sku_id'] == item_sku_id][sales['dc_id'] == dc].sort_values(
                    ['dc_id', 'datetime']).tail(n_seq)
                sales_single_sku_submit_feature = sales_single_sku_submit[columns].values.reshape(1, n_seq, n_feature)
                item_first_cate_cd = sales.head(1)['item_first_cate_cd'].unique()[0]
                item_second_cate_cd = sales.head(1)['item_second_cate_cd'].unique()[0]
                item_third_cate_cd = sales.head(1)['item_third_cate_cd'].unique()[0]
                brand_code = sales.head(1)['brand_code'].unique()[0]
                item_phase = np.array([brand_code, dc, item_first_cate_cd, item_second_cate_cd, item_third_cate_cd])
                sales_single_sku_submit_feature = np.append(item_phase, sales_single_sku_submit_feature)
                quantity = clf.predict(sales_single_sku_submit_feature)
                feature['date'] = off_set
                feature['dc_id'] = dc
                feature['item_sku_id'] = item_sku_id
                feature['quantity'] = quantity
                features.append(feature)


    result = pd.DataFrame(features)
    print('预测-save:all')
    result.to_csv('./jdata_out/submit_rf_20180724.csv', index=False)
Beispiel #33
0
def parser_one(x):
    return datetime.strptime(x, '%d/%m/%Y %H:%M:%S')
Beispiel #34
0
def main():
    """
        主函数
    """
    # 加载数据
    data_dir = './jdata/'
    sales = pd.read_csv(os.path.join(data_dir, 'sku_sales.csv'))
    items = pd.read_csv(os.path.join(data_dir, 'sku_info.csv'))
    attr = pd.read_csv(os.path.join(data_dir, 'sku_attr.csv'))
    promo = pd.read_csv(os.path.join(data_dir, 'sku_prom.csv'))
    quantile = pd.read_csv(os.path.join(data_dir, 'sku_quantile.csv'))
    promo_test = pd.read_csv(os.path.join(data_dir, 'sku_prom_testing_2018Jan.csv'))

    sales['datetime'] = sales['date'].map(lambda x: datetime.strptime(x, '%Y-%m-%d'))

    # LSTM 参数
    n_seq = 8
    n_feature = 2
    n_batch = 1
    n_epochs = 100
    n_neurons = 3

    # 所有的item_sku_id
    columns = ['vendibility', 'quantity']
    features = []
    result = []

    all_off_sets = np.arange(1, 32)
    # all_off_sets = np.arange(1, 3)
    all_item_sku_ids = items['item_sku_id'].unique()
    # all_item_sku_ids=np.arange(100, 102)
    for off_set in all_off_sets:
        all_train = []
        all_test = []
        # 调用方法训练,预测
        for item_sku_id in all_item_sku_ids:
            # 准备时序数据
            sales_single_sku = sales[sales['item_sku_id'] == item_sku_id].sort_values(['dc_id', 'datetime'])
            supervised_values, train, test = series_to_supervised_1(sales_single_sku, n_seq, off_set)
            if(len(all_train) == 0):
                all_train = train
                all_test = test
            else:
                all_train = all_train.append(train)
                all_test = all_test.append(test)

        print('训练: off_set={}'.format(off_set))
        model = fit_lstm(train.values, n_seq, n_feature, n_batch, n_epochs, n_neurons)
        #eval_lstm(model, test, n_seq, n_feature, n_batch)


        # 调用方法训练,预测
        for item_sku_id in all_item_sku_ids:
            # for submit
            # 获取6个不同的dc
            dcs = sales[sales['item_sku_id'] == item_sku_id]['dc_id'].unique()
            for dc in dcs:
                feature = {}
                sales_single_sku = sales[sales['item_sku_id'] == item_sku_id][sales['dc_id'] == dc].sort_values(
                    ['dc_id', 'datetime'])
                sales_single_sku_submit = sales_single_sku.tail(n_seq)
                feature['date'] = off_set
                feature['dc_id'] = dc
                feature['item_sku_id'] = item_sku_id
                if(len(sales_single_sku) == 0):
                    feature['quantity'] = 0

                if (len(sales_single_sku_submit) < n_seq and len(sales_single_sku_submit) > 0):
                    feature['quantity'] = sales_single_sku.tail(1)['quantity'].unique()[0]

                if (len(sales_single_sku_submit) == n_seq ):
                    sales_single_sku_submit_feature = sales_single_sku_submit[columns].values.reshape(1, n_seq,
                                                                                                      n_feature)
                    quantity = forecast_lstm(model, sales_single_sku_submit_feature, n_batch)[0]
                    feature['quantity'] = quantity

                features.append(feature)
            print('预测-save:item_sku_id={}, off_set={}'.format(item_sku_id, off_set))
            result = pd.DataFrame(features)

        print('预测-save:item_sku_id={}, all offset'.format(item_sku_id))
        result = pd.DataFrame(features)
        result.to_csv('./jdata_out/submit.csv', index=False)
    result = pd.DataFrame(features)
    print('预测-save:all')
    result.to_csv('./jdata_out/submit.csv', index=False)
Beispiel #35
0
def parser(x):
    return datetime.strptime(x, '%m/%d/%Y')
Beispiel #36
0
def parser(date_string):
    return datetime.strptime(date_string, '%Y-%m-%d')
Beispiel #37
0
# ===========================
# Reading configuration files
# ===========================

data_conf = utils.Get_Data_From_JSON(ROOT_DIR + "/dbfs" + cwd + "data.json")
model_conf = utils.Get_Data_From_JSON(ROOT_DIR + "/dbfs" + cwd + "config.json")
print(data_conf)
print(model_conf)

# Time window and number of customers
start_date, end_date = data_conf['start_date'], data_conf['end_date']
N_days_X, N_days_y = int(data_conf['number_of_historical_days']), int(
    data_conf['number_of_predicted_days'])  # 365, 92

end_date_dt = datetime.strptime(end_date, "%Y-%m-%d")
start_date_for_prediction_dt = end_date_dt - relativedelta(days=N_days_X +
                                                           N_days_y)
start_date_for_prediction = start_date_for_prediction_dt.strftime("%Y-%m-%d")

start_date_dt, end_date_dt, start_date_prediction, end_date_prediction, end_date_plusOneDay, end_date_minus_6month = utils.dates_definitions(
    start_date, end_date, N_days_X, N_days_y)

time_range = pd.date_range(start_date, end_date, freq='D')

# Type of dataset desired
# Case we want a dataset to train a model: use 1e5 and serving_mode=False
# Case we want an unseen dataset to serve the model on: use 2.5e6 and serving_mode=True
N_customers = 1e2  #2.5e6
serving_mode = False  # True if creating data for serving
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')
Beispiel #39
0
def complete(request, date=None):
    """
    Ajax view for info complete insert positions into db
    :param date: str
    :param request: dict
    :rtype : render
    """
    try:
        # get path then open file
        path = OpenDir().get_path(date)
        fname = OpenDir().get_fname_from_path(path)

        # after opening, date need to minus one
        pd_date = datetime.strptime(date, '%Y-%m-%d')
        pd_date = pd_date - BDay(1)
        date = pd_date.strftime('%Y-%m-%d')

        # continues...
        positions, overall = OpenPosCSV(path).read()

        for position in positions:
            # save positions
            pos = pm.Position(
                symbol=position['Symbol'],
                company=position['Company'],
                date=date
            )
            pos.save()

            # save instrument
            instrument = pm.PositionInstrument()
            instrument.set_dict(position['Instrument'])
            instrument.position = pos
            instrument.save()

            # save stock
            stock = pm.PositionStock()
            stock.set_dict(position['Stock'])
            stock.position = pos
            stock.save()

            # save options
            for pos_option in position['Options']:
                option = pm.PositionOption()
                option.set_dict(pos_option)
                option.position = pos
                option.save()

        pos_overall = pm.Overall(**overall)
        pos_overall.date = date
        pos_overall.save()

        # move files into completed folder
        rename(path, FILES['tos_positions_completed'] + fname)

        # set parameters into templates
        parameters = {
            'date': str(date),
            'fname': str(fname)
        }
    except IOError:
        # set parameters into templates
        parameters = {
            'date': '',
            'fname': ''
        }

    return HttpResponse(
        parameters.__str__(),
        content_type='application/json'
    )