Beispiel #1
0
def train(
    location,
    unique,
    counts,
    model_base_location,
    date_list=None,
    train_new_model=True,
    train_window=7,
    num_forecast=7,
):
    # pad with zeros for time series prediction
    if date_list is not None:
        inter_dates, inter_cases = pp.interpolate_cases(unique,
                                                        counts,
                                                        zeros=True,
                                                        end=str(date_list[-1]))
    else:
        inter_dates, inter_cases = pp.interpolate_cases(unique, counts)

    # create a scaler for this location and normalize the data
    scaler = pp.create_scaler()
    normalized = pp.normalize_data(np.array(inter_cases), scaler)
    inout_seq = pp.create_tensors(normalized, train_window)

    # train or load model
    if train_new_model:
        model = train_model(inout_seq)
        torch.save(model, model_base_location + location + ".pt")
    else:
        model = torch.load(model_base_location + location + ".pt")

    # make predictions
    normalized_preds = predict(model, num_forecast, normalized, train_window)
    predictions = pp.denormalize_data(normalized_preds, scaler)

    # create date list for predictions
    prediction_dates = pp.get_date_list(
        start=str(
            datetime.strptime(inter_dates[-1], "%Y-%m-%d") +
            timedelta(days=1)),
        end=str(
            datetime.strptime(inter_dates[-1], "%Y-%m-%d") +
            timedelta(days=num_forecast)),
    )

    # update case data with predictions
    inter_dates = np.append(inter_dates, prediction_dates)
    inter_cases = np.append(inter_cases, predictions)
    inter_dates = inter_dates.tolist()
    inter_cases = inter_cases.tolist()

    return location, inter_dates, inter_cases, prediction_dates, predictions
Beispiel #2
0
def main():
    f = open('flavor_sum.txt', 'rb')
    flavor_sum = pickle.load(f)
    flavor_normal, data_max, data_min = normalize_data(flavor_sum)
    # flavor_sum1 = roll_mean(7, flavor_sum)
    train_data, supervise_data = slice_data(flavor_normal, 7)

    start = time.clock()
    parameters = two_layer_model(train_data,
                                 supervise_data, (7, 6, 7),
                                 learning_rate=0.6,
                                 lambd=0.02,
                                 iterations=50)
    elapsed = time.clock() - start
    print elapsed

    test = flavor_normal[len(flavor_normal) - 7:len(flavor_normal)]
    #flavor_predict_normal = []

    #for i in range(7):
    flavor_predict_normal = predict(test, parameters)
    #.append(result[0])
    #test.append(result[0])
    #test.pop(0)

    flavor_data = recover_normalized_data(flavor_predict_normal, data_max,
                                          data_min)
    flavor_sum_predict = sum(flavor_data)
    print flavor_sum_predict

    f1 = open('flavor_dict_01-05.txt', 'rb')
    flavor = pickle.load(f1)
    flavor_prob = {}
    flavor_total = {}
    total = sum(flavor_sum)

    for key in flavor.keys():
        flavor_total[key] = sum(flavor[key])
        flavor_prob[key] = flavor_total[key] / float(total)
        if flavor_prob[key] < 0.025:
            flavor_prob[key] = 0.0

    print flavor_prob

    specific_flavor_num = {}
    for key in flavor_prob.keys():
        specific_flavor_num[key] = flavor_sum_predict * flavor_prob[key]

    print specific_flavor_num
def run_task1(results_dir,
              dataset,
              vocabulary,
              test_data,
              augment=False,
              epochs=15,
              skip_test_prediction=False,
              seed=2021):
    HPARAMS = {}
    val_size = HPARAMS["val_size"] = 0.2
    normalize = HPARAMS["normalize"] = True
    HPARAMS["seed"] = seed
    seed_everything(seed)
    split_videos = HPARAMS["split_videos"] = False

    if normalize:
        dataset = normalize_data(deepcopy(dataset))
        if not skip_test_prediction:
            test_data = normalize_data(deepcopy(test_data))
        else:
            test_data = None

    train_data, val_data, anno_perc_df = split_validation(
        dataset,
        seed=seed,
        vocabulary=vocabulary,
        test_size=val_size,
        split_videos=split_videos)
    num_classes = len(anno_perc_df.keys())
    feature_dim = HPARAMS["feature_dim"] = (2, 7, 2)

    # Generator parameters
    past_frames = HPARAMS["past_frames"] = 50
    future_frames = HPARAMS["future_frames"] = 50
    frame_gap = HPARAMS["frame_gap"] = 1
    use_conv = HPARAMS["use_conv"] = True
    batch_size = HPARAMS["batch_size"] = 128

    # Model parameters
    dropout_rate = HPARAMS["dropout_rate"] = 0.5
    learning_rate = HPARAMS["learning_rate"] = 5e-4
    layer_channels = HPARAMS["layer_channels"] = (128, 64, 32)
    conv_size = HPARAMS["conv_size"] = 5
    augment = HPARAMS["augment"] = augment
    class_to_number = HPARAMS['class_to_number'] = vocabulary
    epochs = HPARAMS["epochs"] = epochs

    trainer = Trainer(train_data=train_data,
                      val_data=val_data,
                      test_data=test_data,
                      feature_dim=feature_dim,
                      batch_size=batch_size,
                      num_classes=num_classes,
                      augment=augment,
                      class_to_number=class_to_number,
                      past_frames=past_frames,
                      future_frames=future_frames,
                      frame_gap=frame_gap,
                      use_conv=use_conv)

    trainer.initialize_model(layer_channels=layer_channels,
                             dropout_rate=dropout_rate,
                             learning_rate=learning_rate,
                             conv_size=conv_size)

    trainer.train(epochs=epochs)
    augment_str = '_augmented' if augment else ''
    trainer.model.save(f'{results_dir}/task1{augment_str}.h5')
    np.save(f"{results_dir}/task1{augment_str}_hparams", HPARAMS)

    val_metrics = trainer.get_validation_metrics()
    val_metrics.to_csv(f"{results_dir}/task1_metrics_val.csv", index=False)

    if not skip_test_prediction:
        test_results = trainer.get_test_predictions()
        np.save(f"{results_dir}/test_results", test_results)
    else:
        test_results = {}

    del trainer  # clear ram as the test dataset is large
    gc.collect()
    return test_results
Beispiel #4
0
"""
The main training script.

Written by Tanmay Patil
"""
import matplotlib.pyplot as plt
from model import create_model, train_model
from preprocess import object_to_date_time, get_daily_weather_data, normalize_data, read_csv, get_training_data



if __name__ == "__main__":
    df = read_csv("../data/processed/delhi_weather_data_processed.csv")
    df = object_to_date_time(df)
    daily_weather = get_daily_weather_data(df)
    daily_weather = normalize_data(daily_weather)
    X,y = get_training_data(daily_weather)
    X_train = X[:7300,::]
    X_test = X[7300:,::]
    y_train = y[:7300]
    y_test = y[7300:]
    model = create_model()
    history = train_model(model, X_train, y_train)
    train_loss = history.history['loss']
    x_axis = [*range(1, len(train_loss + 1))]
    plt.title('Training Loss')
    plt.plot(x_axis, train_loss)
    plt.show()
from torch.utils.data import DataLoader
from torchvision import transforms
import pandas as pd
from preprocess import data_split, normalize_data
import matplotlib.pyplot as plt
import numpy as np

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
DATASET_ROOT = './'

df = pd.read_csv("./STT.csv", index_col=0)

STT = df[df.symbol == 'STT'].copy()
#print(GOOG)
STT.drop(['symbol'], 1, inplace=True)  #刪除symbol列
STT_new = normalize_data(STT)
#print(GOOG_new)
window = 30
X_train, y_train, X_test, y_test = data_split(STT_new, window)

INPUT_SIZE = 5
HIDDEN_SIZE = 64
NUM_LAYERS = 1
OUTPUT_SIZE = 1

learning_rate = 0.001
num_epochs = 50

rnn = test_LSTM(input_dim=INPUT_SIZE,
                hidden_dim=HIDDEN_SIZE,
                num_layers=NUM_LAYERS,
Beispiel #6
0
def nn_app_utils(flavor_sum, flavor, predict_date):
    '''

	:param flavor_sum:
	:param flavor:
	:return:
	'''
    flavor_normal, data_mean, data_sigma = normalize_data(flavor_sum)
    # flavor_sum1 = roll_mean(7, flavor_sum)
    day_len = (predict_date[-1] - predict_date[0]).days
    train_data, supervise_data = slice_data(flavor_normal, day_len)
    #train_prob, supervise_prob, week_prob_last = week_prob(flavor, flavor_sum, day_len)
    '''
	start2 = time.clock()
	# train prob of the each vm
	parameters2 = two_layer_model(train_prob, supervise_prob, (len(flavor.keys()), 9, len(flavor.keys())), learning_rate=0.25, lambd=0.08, iterations=60)
	elapsed2 = time.clock() - start2
	print 'prob train time:'
	print elapsed2

	flavor_prob = predict(week_prob_last, parameters2)
	for i in range(len(flavor_prob)):
		flavor_prob[i] = flavor_prob[i] / float(sum(flavor_prob))
	'''
    start1 = time.clock()
    # train total vm number in every predicted date
    parameters1 = two_layer_model(train_data,
                                  supervise_data, (day_len, 6, day_len),
                                  learning_rate=0.18,
                                  lambd=0.1,
                                  iterations=60)
    elapsed1 = time.clock() - start1
    print 'data train time:'
    print elapsed1
    '''
	start2 = time.clock()
	# train prob of the each vm
	parameters2 = two_layer_model(train_prob, supervise_prob, (len(flavor.keys()), 7, len(flavor.keys())), learning_rate=0.1, lambd=0.1, iterations=60)
	elapsed2 = time.clock() - start2
	print 'prob train time:'
	print elapsed2
	'''
    #predict flavor total number in each day
    test = flavor_normal[len(flavor_normal) - day_len:len(flavor_normal)]
    flavor_predict_normal = predict(test, parameters1)
    flavor_data = recover_normalized_data(flavor_predict_normal, data_mean,
                                          data_sigma)
    flavor_sum_predict = math.floor(sum(flavor_data))

    #predict prob in each vm
    #flavor_prob = predict(week_prob_last, parameters2)

    flavor_prob = {}
    flavor_total = {}
    total = sum(flavor_sum)

    for key in flavor.keys():
        flavor_total[key] = sum(flavor[key])
        flavor_prob[key] = flavor_total[key] / float(total)
        #if flavor_prob[key] < 0.025:
        #	flavor_prob[key] = 0.0

    test_total = {}
    test_prob = {}
    #print flavor_prob
    for key in flavor.keys():
        test_total[key] = sum(flavor[key][len(flavor_sum) -
                                          day_len:len(flavor_sum)])
        test_prob[key] = test_total[key] / float(
            sum(flavor_sum[len(flavor_sum) - day_len:len(flavor_sum)]))
        #if flavor_prob[key] < 0.025:
        #	flavor_prob[key] = 0.0

    test_prob_sort = sorted(test_prob.items(), key=lambda item: item[1])
    flavor_prob_sort = sorted(flavor_prob.items(), key=lambda item: item[1])

    flavor_resort_prob = {}
    for k in range(len(flavor_prob)):
        flavor_resort_prob[test_prob_sort[k][0]] = flavor_prob_sort[k][1]

    specific_flavor_num = {}
    flavor_total_indeed = 0

    for i in flavor_prob.keys():
        specific_flavor_num[i] = int(
            round(flavor_sum_predict * flavor_resort_prob[i]))
        flavor_total_indeed += specific_flavor_num[i]
    '''
	specific_flavor_num = {}
	flavor_total_indeed = 0
	key_list = flavor.keys()
	key_list.sort(key=lambda i: int(re.findall(r"\d+", i)[0]), reverse=False)

	for i in range(len(flavor_prob)):
		specific_flavor_num[key_list[i]] = int(round(flavor_sum_predict * flavor_prob[i]))
		flavor_total_indeed += specific_flavor_num[key_list[i]]

	#flavor_test = recover_normalized_data(test, data_max, data_min)

	'''

    return flavor_total_indeed, specific_flavor_num