def experiment3(): # Parse training data to build word frequencies spam_counter, ham_counter, spam_prob, ham_prob = build_model.get_word_frequency() # Get the vocabulary of words present in the Training Set vocab = list(set(spam_counter).union(set(ham_counter))) vocab.sort() # Check if the word is the appropriate length ham_words = sorted(ham_counter.keys()) spam_words = sorted(spam_counter.keys()) for word in vocab: if len(word) <= 2 or len(word) >= 9: # Remove words of bad length from the counters if word in ham_words: del ham_counter[word] if word in spam_words: del spam_counter[word] # Create model of conditional probabilities vocab, ham_cond_prob, spam_cond_prob = build_model.create_model(spam_counter, ham_counter, model_filename="wordlength-model.txt") # Evaluate the effectiveness of the model over the test set build_model.evaluate_model(ham_cond_prob, spam_cond_prob, spam_prob, ham_prob, results_filename='wordlength-result.txt')
def build_program(is_train, main_prog, startup_prog, args): """build program, and add grad op in program accroding to different mode Args: is_train: mode: train or test main_prog: main program startup_prog: strartup program args: arguments Returns : train mode: [Loss, global_lr, py_reader] test mode: [Loss, py_reader] """ model = models.__dict__[args.model]() with fluid.program_guard(main_prog, startup_prog): if args.random_seed: main_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): py_reader, loss_out = create_model(model, args, is_train) # add backward op in program if is_train: optimizer = create_optimizer(args) avg_cost = loss_out[0] optimizer.minimize(avg_cost) #XXX: fetch learning rate now, better implement is required here. global_lr = optimizer._global_learning_rate() global_lr.persistable = True loss_out.append(global_lr) loss_out.append(py_reader) return loss_out
def build_program(is_train, main_prog, startup_prog, args): """build program, and add backward op in program accroding to different mode Parameters: is_train: indicate train mode or test mode main_prog: main program startup_prog: strartup program args: arguments Returns : train mode: [Loss, global_lr, data_loader] test mode: [Loss, data_loader] """ if args.model.startswith('EfficientNet'): override_params = {"drop_connect_rate": args.drop_connect_rate} padding_type = args.padding_type use_se = args.use_se model = models.__dict__[args.model](is_test=not is_train, override_params=override_params, padding_type=padding_type, use_se=use_se) else: model = models.__dict__[args.model]() optimizer = None with fluid.program_guard(main_prog, startup_prog): if args.random_seed or args.enable_ce: main_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): data_loader, loss_out = create_model(model, args, is_train) # add backward op in program if is_train: optimizer = create_optimizer(args) avg_cost = loss_out[0] #XXX: fetch learning rate now, better implement is required here. global_lr = optimizer._global_learning_rate() global_lr.persistable = True loss_out.append(global_lr) if args.use_amp: optimizer = paddle.static.amp.decorate( optimizer, init_loss_scaling=args.scale_loss, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, use_pure_fp16=args.use_pure_fp16, use_fp16_guard=True) optimizer.minimize(avg_cost) if args.use_ema: global_steps = fluid.layers.learning_rate_scheduler._decay_step_counter( ) ema = ExponentialMovingAverage(args.ema_decay, thres_steps=global_steps) ema.update() loss_out.append(ema) loss_out.append(data_loader) return loss_out, optimizer
def experiment1(): # Parse training data to build word frequencies spam_counter, ham_counter, spam_prob, ham_prob = build_model.get_word_frequency() # Create model of conditional probabilities vocab, ham_cond_prob, spam_cond_prob = build_model.create_model(spam_counter, ham_counter, model_filename="model.txt") # Evaluate the effectiveness of the model over the test set build_model.evaluate_model(ham_cond_prob, spam_cond_prob, spam_prob, ham_prob, results_filename='baseline-result.txt')
def load_model(checkpoint): model = build_model.create_model(checkpoint['arch'], checkpoint['hidden_units'], checkpoint['output_size'], checkpoint['dropout'], pytorch_models) for param in model.parameters(): param.requires_grad = False model.load_state_dict(checkpoint['model_state_dict']) model.class_to_idx = checkpoint["class_to_idx"] lr = checkpoint["learning_rate"] op = optim.Adam(model.classifier.parameters(), lr=lr) op.load_state_dict(checkpoint['optimizer_state_dict']) return model, op
def experiment2(): # Parse training data to build word frequencies spam_counter, ham_counter, spam_prob, ham_prob = build_model.get_word_frequency() # Get the stopwords ham_words = sorted(ham_counter.keys()) spam_words = sorted(spam_counter.keys()) with open('Data/English-Stop-Words.txt', 'r', encoding='latin-1') as file: for stopword in file: # Remove stopwords from the counters stopword = stopword[:-1] # remove \n if stopword in ham_words: del ham_counter[stopword] if stopword in spam_words: del spam_counter[stopword] # Create model of conditional probabilities vocab, ham_cond_prob, spam_cond_prob = build_model.create_model(spam_counter, ham_counter, model_filename="stopword-model.txt") # Evaluate the effectiveness of the model over the test set build_model.evaluate_model(ham_cond_prob, spam_cond_prob, spam_prob, ham_prob, results_filename='stopword-result.txt')
"resnet152": models.resnet18, "densenet121": models.densenet121, "densenet169": models.densenet169, "densenet201": models.densenet201, "densenet161": models.densenet161, "inception_v3": models.inception_v3 } ### Load mapping with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) ### Create our model output_size = len(cat_to_name) dropout = 0.4 model = build_model.create_model(args.arch, args.hidden_units, output_size, dropout, pytorch_models) criterion = nn.NLLLoss() model.class_to_idx = train_data.class_to_idx ### Choose device and transfer over model device = torch.device( "cuda" if args.device == "gpu" and torch.cuda.is_available() else "cpu") model.to(device) print("Using {}".format(device)) print( "GPU is {}available".format(" " if torch.cuda.is_available() else "not ")) ### Train and validate the final model print("Training model...") optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate)
spdr_data = pd.read_csv('csv_files/testSPY.csv') y_scaler = MinMaxScaler(feature_range=(0, 1)) y_data = spdr_data['Close'].values y_data = y_data.reshape(-1, 1) y_scaled_data = y_scaler.fit_transform(y_data) y_scaled_data = y_scaled_data.flatten() # Combine historical data from SPDR and the components of the sp500, built sp500_y_combined = np.stack((sp500_1D, y_scaled_data), axis=-1) # Build the LSTM training_data, testing_data = build_model.create_testing_training_data( sp500_y_combined) X_training_timestep, y_training_timestep = build_model.timestep( training_data) X_testing_timestep, y_testing_timestep = build_model.timestep(testing_data) input_shape = (X_training_timestep.shape[1], 1) model = build_model.create_model(input_shape) trained_model = build_model.train_the_model(model, X_training_timestep, y_training_timestep) trained_model.save('trained_models/trained_model.h5') predicted_value = model.predict(X_testing_timestep) actual_value = y_testing_timestep predicted_value = y_scaler.inverse_transform(predicted_value.reshape( -1, 1)) actual_value = y_scaler.inverse_transform(actual_value.reshape(-1, 1)) print("--- %s seconds ---" % (time.time() - start_time)) # How long it took to finish executing the program build_model.plot_results(actual_value, predicted_value, 'Actual SPRD Price')