def start_app(): global historicalData, sizeOfModelData, marketData, symbolData, periodData, model, n_per_in, n_per_out, n_features, modelWeights global modelWeights, percentTestData, epochs, batch_size, earlyStop, saveWeights, displayResults, df, close_scaler, testDf, numberToPredict global percentToPredict, predictions, shortDf, actual, normalizedDf, trainDf, normalizedTrainDf, startAmount, tradingFee, train_close_scaler, calcPredictionsNumberPredicts, binaryModel # functions.disable_console() df, testDf, trainDf, n_features = functions.get_data( periodData, marketData, symbolData, percentTestData=percentTestData, saveData=True, historical=historicalData, size=sizeOfModelData) model = functions.create_model(n_per_in, n_per_out, n_features) binaryModel = functions.create_model_binary(n_per_in, n_per_out, n_features) normalizedTrainDf, train_close_scaler = functions.normalize_df(trainDf) normalizedDf, close_scaler = functions.normalize_df(df) normalizedDf.to_csv("normalizedDf.csv")
def train(): filename = './data.csv' intent, unique_intent, sentences = load_dataset(filename) cleaned_words = cleaning(sentences) word_tokenizer = create_tokenizer(cleaned_words) vocab_size = len(word_tokenizer.word_index) + 1 max_len = max_length(cleaned_words) encoded_doc = encoding_doc(word_tokenizer, cleaned_words) padded_doc = padding_doc(encoded_doc, max_len) output_tokenizer = create_tokenizer( unique_intent, filters='!"#$%&()*+,-/:;<=>?@[\]^`{|}~') encoded_output = encoding_doc(output_tokenizer, intent) encoded_output = np.array(encoded_output).reshape(len(encoded_output), 1) output_one_hot = one_hot(encoded_output) train_X, val_X, train_Y, val_Y = train_test_split(padded_doc, output_one_hot, shuffle=True, test_size=0.2) model = create_model(vocab_size, max_len) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) filename = 'model.h5' checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min') hist = model.fit(train_X, train_Y, epochs=100, batch_size=32, validation_data=(val_X, val_Y), callbacks=[checkpoint])
def load_own_model(weights_path): # return load_model(weights_path) model = create_model(shape=SHAPE) model.load_weights(weights_path) return model
# These 5 lines take in the command line arguments parser = argparse.ArgumentParser(description='Retrain the model.') parser.add_argument('--graph', default=False, action='store_true') parser.add_argument('--filename', nargs=1, help='Data to train on (csv)') parser.add_argument('--verbose', default=False, action='store_true', help='Show the metrics for the training (how well it did') args = parser.parse_args() # This makes sure that the filename of the inputted file is a CSV if args.filename[0][-3:] != 'csv': print('The file needs to be in csv format. Please see the example data') exit(1) # Get the data from the training file data = get_data(args.filename[0]) # Create the file from the data results = create_model(data, args.verbose) # Output the new parameters print('For the equation: (flowrate * a) / (tmp - flowrate * b) + flowrate * c') print('a = {}'.format(results[0])) print('b = {}'.format(results[1])) print('c = {}'.format(results[2])) # Graph the data, if requested if args.graph: generate_graph(data, results)
def load_checkpoint(checkpoint_directory): checkpoint = torch.load(checkpoint_directory) model = create_model(checkpoint.architecture, checkpoint.hidden_units) model.load_state_dict(checkpoint.model_state_dict) model.class_to_idx = checkpoint.class_to_idx return model
seed=42 # set seed for reproducability ) # Test image generator test_generator = ImageDataGenerator().flow_from_directory( test_dir, target_size=(image_width, image_height), batch_size=batch_size, seed=42 # set seed for reproducability ) # Include the epoch in the file name (uses `str.format`) checkpoint_path = "checkpoints/q1_V5/cp-{epoch:04d}.ckpt" checkpoint_dir = os.path.dirname(checkpoint_path) classifier = create_model(input_size) # print model structure diagram print(classifier.summary()) # Compiling the CNN classifier.compile(adam(lr=.0001), loss='categorical_crossentropy', metrics=['accuracy']) cp_callback = create_callback(checkpoint_path, 1) hist = classifier.fit_generator( train_generator, epochs=num_epoch, steps_per_epoch=num_train_samples // batch_size,
"early_stop": True, "transfer_learning": True, "batch_size": 32, "activation": "elu", "optimizer": "adam"}) config = wandb.config # * CREATE MODEL train_set, val_set = train_test_split(y_labels[0:200], test_size = 0.2) train_generator, valid_generator = generate_generators(train_set, val_set, config, UNIQUE_LABELS, transfer_learning = config.transfer_learning, augmentation = False) m, base_model, F2Score = create_model(config, UNIQUE_LABELS, transfer_learning = config.transfer_learning) early_stopping, checkpoint, reduce_lr = create_callbacks(model_name = wandb.run.name, patience = config.patience) if config.class_weight = True: class_weight = weight_dict else: class_weight = None # * RUN MODEL STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size history = m.fit(train_generator,
parser.add_argument('--gpu', dest='gpu', action='store_true', default=False, help = 'Turn on the use of GPU') arguments = parser.parse_args() data_dir = arguments.data_dir save_dir = arguments.save_dir arch = arguments.arch learning_rate = arguments.learning_rate hidden_units = arguments.hidden_units epochs = arguments.epochs gpu = arguments.gpu print("Data Info :\n") train_dataloader, test_dataloader, validation_dataloader = load_data(data_dir) print("\nModel Info :\n") model = create_model(arch, hidden_units) print("\nTrain starts:\n") if gpu: print("GPU activated") else: print("GPU is not activated") print("Training epochs : {}".format(str(epochs))) print("Learning rate : {:.4f}".format(learning_rate)) model, optimizer, criterion = train(model, train_dataloader, validation_dataloader, learning_rate, gpu, epochs, 40) print("\nSave checkpoint:\n") save_checkpoint(model, save_dir, optimizer, criterion, epochs)
#Setting variables used for training # Splitting into 80% for training and 20% for validation train_X, val_X, train_Y, val_Y = train_test_split(padded_doc, output_one_hot, shuffle=True, test_size=0.2) print("Shape of train_X = %s and train_Y = %s" % (train_X.shape, train_Y.shape)) print("Shape of val_X = %s and val_Y = %s" % (val_X.shape, val_Y.shape)) #Defining the Model #Using Bidirectional GRU #GRU is a Gated Recurrent Unit #Think its using LSTM and RNNs model = functions.create_model(vocab_size, max_length) #Trained Model with adam optimizer #batch size 16 and epochs 100 model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) model.summary() filename = 'model.h5' checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# Couple of ways included to create the pandas dataframe, one from sqlite, one via path, and finally one via github # Sqlite option # # songs_df = pd.read_sql_table('songs', 'sqlite:///db.sqlite3') # path option # songs_df = pd.read_csv('../Data/SpotifyAudioFeaturesApril2019_duplicates_removed.csv') # github option infile = "https://raw.githubusercontent.com/spotify-recommendation-engine-3/data_science/master/Data/SpotifyAudioFeaturesApril2019_duplicates_removed.csv" songs_df = pd.read_csv(infile) y = songs_df[songs_df.columns[:3]] X = songs_df[songs_df.columns[3:]] my_model = create_model(preprocess(X)) @app.route('/', methods=['GET', 'POST']) def plot_png(): fig = create_figure() output = io.BytesIO() FigureCanvas(fig).print_png(output) return Response(output.getvalue(), mimetype='image/png') def create_figure(): song_df = songs_df.sample() song_df = song_df.iloc[:, 3:] songs_to_plot = suggest_songs(song_df, songs_df, y, my_model) fig = Figure(figsize=(9, 9), edgecolor='gray')