def train( location, unique, counts, model_base_location, date_list=None, train_new_model=True, train_window=7, num_forecast=7, ): # pad with zeros for time series prediction if date_list is not None: inter_dates, inter_cases = pp.interpolate_cases(unique, counts, zeros=True, end=str(date_list[-1])) else: inter_dates, inter_cases = pp.interpolate_cases(unique, counts) # create a scaler for this location and normalize the data scaler = pp.create_scaler() normalized = pp.normalize_data(np.array(inter_cases), scaler) inout_seq = pp.create_tensors(normalized, train_window) # train or load model if train_new_model: model = train_model(inout_seq) torch.save(model, model_base_location + location + ".pt") else: model = torch.load(model_base_location + location + ".pt") # make predictions normalized_preds = predict(model, num_forecast, normalized, train_window) predictions = pp.denormalize_data(normalized_preds, scaler) # create date list for predictions prediction_dates = pp.get_date_list( start=str( datetime.strptime(inter_dates[-1], "%Y-%m-%d") + timedelta(days=1)), end=str( datetime.strptime(inter_dates[-1], "%Y-%m-%d") + timedelta(days=num_forecast)), ) # update case data with predictions inter_dates = np.append(inter_dates, prediction_dates) inter_cases = np.append(inter_cases, predictions) inter_dates = inter_dates.tolist() inter_cases = inter_cases.tolist() return location, inter_dates, inter_cases, prediction_dates, predictions
def main(): f = open('flavor_sum.txt', 'rb') flavor_sum = pickle.load(f) flavor_normal, data_max, data_min = normalize_data(flavor_sum) # flavor_sum1 = roll_mean(7, flavor_sum) train_data, supervise_data = slice_data(flavor_normal, 7) start = time.clock() parameters = two_layer_model(train_data, supervise_data, (7, 6, 7), learning_rate=0.6, lambd=0.02, iterations=50) elapsed = time.clock() - start print elapsed test = flavor_normal[len(flavor_normal) - 7:len(flavor_normal)] #flavor_predict_normal = [] #for i in range(7): flavor_predict_normal = predict(test, parameters) #.append(result[0]) #test.append(result[0]) #test.pop(0) flavor_data = recover_normalized_data(flavor_predict_normal, data_max, data_min) flavor_sum_predict = sum(flavor_data) print flavor_sum_predict f1 = open('flavor_dict_01-05.txt', 'rb') flavor = pickle.load(f1) flavor_prob = {} flavor_total = {} total = sum(flavor_sum) for key in flavor.keys(): flavor_total[key] = sum(flavor[key]) flavor_prob[key] = flavor_total[key] / float(total) if flavor_prob[key] < 0.025: flavor_prob[key] = 0.0 print flavor_prob specific_flavor_num = {} for key in flavor_prob.keys(): specific_flavor_num[key] = flavor_sum_predict * flavor_prob[key] print specific_flavor_num
def run_task1(results_dir, dataset, vocabulary, test_data, augment=False, epochs=15, skip_test_prediction=False, seed=2021): HPARAMS = {} val_size = HPARAMS["val_size"] = 0.2 normalize = HPARAMS["normalize"] = True HPARAMS["seed"] = seed seed_everything(seed) split_videos = HPARAMS["split_videos"] = False if normalize: dataset = normalize_data(deepcopy(dataset)) if not skip_test_prediction: test_data = normalize_data(deepcopy(test_data)) else: test_data = None train_data, val_data, anno_perc_df = split_validation( dataset, seed=seed, vocabulary=vocabulary, test_size=val_size, split_videos=split_videos) num_classes = len(anno_perc_df.keys()) feature_dim = HPARAMS["feature_dim"] = (2, 7, 2) # Generator parameters past_frames = HPARAMS["past_frames"] = 50 future_frames = HPARAMS["future_frames"] = 50 frame_gap = HPARAMS["frame_gap"] = 1 use_conv = HPARAMS["use_conv"] = True batch_size = HPARAMS["batch_size"] = 128 # Model parameters dropout_rate = HPARAMS["dropout_rate"] = 0.5 learning_rate = HPARAMS["learning_rate"] = 5e-4 layer_channels = HPARAMS["layer_channels"] = (128, 64, 32) conv_size = HPARAMS["conv_size"] = 5 augment = HPARAMS["augment"] = augment class_to_number = HPARAMS['class_to_number'] = vocabulary epochs = HPARAMS["epochs"] = epochs trainer = Trainer(train_data=train_data, val_data=val_data, test_data=test_data, feature_dim=feature_dim, batch_size=batch_size, num_classes=num_classes, augment=augment, class_to_number=class_to_number, past_frames=past_frames, future_frames=future_frames, frame_gap=frame_gap, use_conv=use_conv) trainer.initialize_model(layer_channels=layer_channels, dropout_rate=dropout_rate, learning_rate=learning_rate, conv_size=conv_size) trainer.train(epochs=epochs) augment_str = '_augmented' if augment else '' trainer.model.save(f'{results_dir}/task1{augment_str}.h5') np.save(f"{results_dir}/task1{augment_str}_hparams", HPARAMS) val_metrics = trainer.get_validation_metrics() val_metrics.to_csv(f"{results_dir}/task1_metrics_val.csv", index=False) if not skip_test_prediction: test_results = trainer.get_test_predictions() np.save(f"{results_dir}/test_results", test_results) else: test_results = {} del trainer # clear ram as the test dataset is large gc.collect() return test_results
""" The main training script. Written by Tanmay Patil """ import matplotlib.pyplot as plt from model import create_model, train_model from preprocess import object_to_date_time, get_daily_weather_data, normalize_data, read_csv, get_training_data if __name__ == "__main__": df = read_csv("../data/processed/delhi_weather_data_processed.csv") df = object_to_date_time(df) daily_weather = get_daily_weather_data(df) daily_weather = normalize_data(daily_weather) X,y = get_training_data(daily_weather) X_train = X[:7300,::] X_test = X[7300:,::] y_train = y[:7300] y_test = y[7300:] model = create_model() history = train_model(model, X_train, y_train) train_loss = history.history['loss'] x_axis = [*range(1, len(train_loss + 1))] plt.title('Training Loss') plt.plot(x_axis, train_loss) plt.show()
from torch.utils.data import DataLoader from torchvision import transforms import pandas as pd from preprocess import data_split, normalize_data import matplotlib.pyplot as plt import numpy as np device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") DATASET_ROOT = './' df = pd.read_csv("./STT.csv", index_col=0) STT = df[df.symbol == 'STT'].copy() #print(GOOG) STT.drop(['symbol'], 1, inplace=True) #刪除symbol列 STT_new = normalize_data(STT) #print(GOOG_new) window = 30 X_train, y_train, X_test, y_test = data_split(STT_new, window) INPUT_SIZE = 5 HIDDEN_SIZE = 64 NUM_LAYERS = 1 OUTPUT_SIZE = 1 learning_rate = 0.001 num_epochs = 50 rnn = test_LSTM(input_dim=INPUT_SIZE, hidden_dim=HIDDEN_SIZE, num_layers=NUM_LAYERS,
def nn_app_utils(flavor_sum, flavor, predict_date): ''' :param flavor_sum: :param flavor: :return: ''' flavor_normal, data_mean, data_sigma = normalize_data(flavor_sum) # flavor_sum1 = roll_mean(7, flavor_sum) day_len = (predict_date[-1] - predict_date[0]).days train_data, supervise_data = slice_data(flavor_normal, day_len) #train_prob, supervise_prob, week_prob_last = week_prob(flavor, flavor_sum, day_len) ''' start2 = time.clock() # train prob of the each vm parameters2 = two_layer_model(train_prob, supervise_prob, (len(flavor.keys()), 9, len(flavor.keys())), learning_rate=0.25, lambd=0.08, iterations=60) elapsed2 = time.clock() - start2 print 'prob train time:' print elapsed2 flavor_prob = predict(week_prob_last, parameters2) for i in range(len(flavor_prob)): flavor_prob[i] = flavor_prob[i] / float(sum(flavor_prob)) ''' start1 = time.clock() # train total vm number in every predicted date parameters1 = two_layer_model(train_data, supervise_data, (day_len, 6, day_len), learning_rate=0.18, lambd=0.1, iterations=60) elapsed1 = time.clock() - start1 print 'data train time:' print elapsed1 ''' start2 = time.clock() # train prob of the each vm parameters2 = two_layer_model(train_prob, supervise_prob, (len(flavor.keys()), 7, len(flavor.keys())), learning_rate=0.1, lambd=0.1, iterations=60) elapsed2 = time.clock() - start2 print 'prob train time:' print elapsed2 ''' #predict flavor total number in each day test = flavor_normal[len(flavor_normal) - day_len:len(flavor_normal)] flavor_predict_normal = predict(test, parameters1) flavor_data = recover_normalized_data(flavor_predict_normal, data_mean, data_sigma) flavor_sum_predict = math.floor(sum(flavor_data)) #predict prob in each vm #flavor_prob = predict(week_prob_last, parameters2) flavor_prob = {} flavor_total = {} total = sum(flavor_sum) for key in flavor.keys(): flavor_total[key] = sum(flavor[key]) flavor_prob[key] = flavor_total[key] / float(total) #if flavor_prob[key] < 0.025: # flavor_prob[key] = 0.0 test_total = {} test_prob = {} #print flavor_prob for key in flavor.keys(): test_total[key] = sum(flavor[key][len(flavor_sum) - day_len:len(flavor_sum)]) test_prob[key] = test_total[key] / float( sum(flavor_sum[len(flavor_sum) - day_len:len(flavor_sum)])) #if flavor_prob[key] < 0.025: # flavor_prob[key] = 0.0 test_prob_sort = sorted(test_prob.items(), key=lambda item: item[1]) flavor_prob_sort = sorted(flavor_prob.items(), key=lambda item: item[1]) flavor_resort_prob = {} for k in range(len(flavor_prob)): flavor_resort_prob[test_prob_sort[k][0]] = flavor_prob_sort[k][1] specific_flavor_num = {} flavor_total_indeed = 0 for i in flavor_prob.keys(): specific_flavor_num[i] = int( round(flavor_sum_predict * flavor_resort_prob[i])) flavor_total_indeed += specific_flavor_num[i] ''' specific_flavor_num = {} flavor_total_indeed = 0 key_list = flavor.keys() key_list.sort(key=lambda i: int(re.findall(r"\d+", i)[0]), reverse=False) for i in range(len(flavor_prob)): specific_flavor_num[key_list[i]] = int(round(flavor_sum_predict * flavor_prob[i])) flavor_total_indeed += specific_flavor_num[key_list[i]] #flavor_test = recover_normalized_data(test, data_max, data_min) ''' return flavor_total_indeed, specific_flavor_num