def main(params): filepath = params['filepath'] date = params['date'] forecastcol = params['forecastcolumn'] epochs = params['epochs'] bs = params['batch_size'] ws = params['sequence_length'] #read the filepath df = pd.read_csv(filepath, sep=",") dataobj = Dataset(df, date, forecastcol) #normal trian and eval split #this split doesn't belong to model data preparation train_split, eval_split = dataobj.normal_split() model_train, model_labels = model_data_prep(train_split, ws) logging.info("Train Data Shape:{}".format(model_train.shape)) logging.info("Train Label Shape:{}".format(model_labels.shape)) #call a model file logging.info("============= MODEL TRAINING STARTED =============") network, modeldump = model.lstm(model_train, model_labels, epochs=epochs, batch_size = bs) #model.plot_loss(modeldump) logging.info("============== MODEL PREDICTIOn STARTED =============") predictions = model.forecast_fn(network, model_train[-1], len(eval_split), ws) print(predictions) assert len(eval_split) == len(predictions), "Length Mismatch between Actuals and Predictions" plt.plot(eval_split) plt.plot(predictions) plt.show() logging.info("Model Score on Test Data:{}".format(model.evaluate_model_performance(eval_split, predictions)))
def __init__(self): self.mu = 10 self.lmbda = 6 self.num_gen = 10 self.n = 44 self.lstm = lstm() self.population = self.init_population() self.fitnesses = [] self.best_genotype = None for i in range(self.num_gen): print('generation {}'.format(i)) self.evolutionary_cycle() self.fitnesses.append(self.population[0].fitness) print(self.best_genotype) plt.plot(self.fitnesses) plt.show()
def main(): # args from config args = parse_args() # pre-check saving_path = pre_check(args) # load and pre-process data data_pack = data_packed() data_pack.data_process(args) # print running information print_info(saving_path, args, data_pack) # model model = lstm(data_pack, args) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) if args.load_model: model.load_state_dict(torch.load(args.model_path)) # if args.GPU_avaiable: # model.cuda() model.cuda() # train trainer = Trainer(data_pack, saving_path, args, model, optimizer) if not args.load_model: trainer.train() # test trainer.test() # scores scores(saving_path, args, data_pack) # eer compute_EER(saving_path, args)
df = generate_df(add_selectbox) df st.write("Les données préalablement récupérer nous décrit donc cette courbe") st.set_option('deprecation.showPyplotGlobalUse', False) plot_figs(df) st.pyplot() scaled_close_data, training_data_len, close_dataset, close_data, df = prepro( df) x_train, y_train, df = generate_x_train(scaled_close_data, training_data_len, close_dataset, close_data, df) st.write( "En nous basant sur ces données et par le moyen d'une entraînement nous pouvons effectuer une prédiction par rapport à notre dataset" ) valid, predictions, rmse, train, df = lstm(x_train, training_data_len, close_dataset, close_data, df, y_train) valid st.write( "Ainsi il est aisé de constater que notre algorithme de prédiction respecte assez bien les tendances" ) plot_pred(valid, predictions, rmse, train, df) st.pyplot()
train_data = Dataset(partition['train'], data_path) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=collate_fn) val_data = Dataset(partition['validation'], data_path) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, collate_fn=collate_fn) ratio = 50 # TODO: manually find ratio of sepsis occurence model = lstm(embedding, hidden_size, num_layers, batch_size, args.device, embed) ''' for loading previous model''' #TODO: put train/test into functions #TODO: make this controled by an arg when calling #TODO: also load losses and accuracy for graphing and add ability to continue them if load_model: model.load_state_dict(torch.load(load_model)) train_losses = np.concatenate( (np.load(load_folder + '/train_losses.npy'), np.zeros(epochs))) train_pos_acc = np.concatenate( (np.load(load_folder + '/train_pos_acc.npy'), np.zeros(epochs))) train_neg_acc = np.concatenate( (np.load(load_folder + '/train_neg_acc.npy'), np.zeros(epochs))) val_losses = np.concatenate( (np.load(load_folder + '/val_losses.npy'), np.zeros(epochs))) val_pos_acc = np.concatenate(
y_train = [] y_train.append(label) data, label = data_process(test_path) x_test = [] x_test.append(data) #y_test = [] #y_test.append(label) for i in range(len(x_test[0]), len(x_train[0])): x_test[0].append([0.0] * 81) print(x_test) print(np.array(x_train).shape) print(np.array(x_test).shape) Model = lstm() Model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) Model.fit(np.array(x_train), np.array(y_train), epochs=40, batch_size=64) result = Model.predict(np.array(x_test)) print(result) np.savetxt('result/result.csv', result[0], delimiter=',') # Feature Selection ''' importances = feature_selection.RF(data, label) for index, value in importances: print(f'feature {index}: {value}') '''
from model.lstm import * from preprocess import * if __name__ == "__main__": pre_training = True # Set to true to load weights Syn_aug = True # it False faster but does slightly worse on Test dataset sls = lstm("lstm", training=True) if pre_training == True: print "Pre-training" train = pickle.load(open("stsallrmf.p", "rb")) sls.train_lstm(train, 66) print "Pre-training done" # Train Step train = pickle.load(open("semtrain.p", 'rb')) test = pickle.load(open("semtest.p", 'rb')) if Syn_aug == True: train = expand(train) sls.train_lstm(train, 100, test) else: sls.train_lstm(train, 100, test) # Test Step test = pickle.load(open("semtest.p", 'rb')) print sls.chkterr2(test)
d = one_hot_decode(oh) print(d) X, y = get_pair(5, 2, 50) print(X.shape, y.shape) print('X=%s, y=%s' % (one_hot_decode(X[0]), one_hot_decode(y[0]))) # Baseline without attention # configure problem n_features = 50 n_timesteps_in = 5 n_timesteps_out = 2 # Create different models & compare simple_lstm = lstm(lstm_cells=150, n_timesteps_in=n_timesteps_in, n_features=n_features) seq2seq_model = seq2seq(lstm_cells=150, n_timesteps_in=n_timesteps_in, n_features=n_features) attention_model = attention(lstm_cells=150, n_timesteps_in=n_timesteps_in, n_features=n_features) for model in simple_lstm, seq2seq_model, attention_model: # train for epoch in range(5000): # generate new random sequence X, y = get_pair(n_timesteps_in, n_timesteps_out, n_features) # fit model for one epoch on this sequence
num_workers=args.numworkers) vocabSize = data.vocabularySize() embeddingSize = 300 hiddenSize = 100 momentum = 0.9 if args.type in 'rnn': print('RNN model') model = rnn(vocabSize, embeddingSize, hiddenSize).to(device) elif args.type in 'gru': print('GRU model') model = gru(vocabSize, embeddingSize, hiddenSize).to(device) elif args.type in 'lstm': print('LSTM model') model = lstm(vocabSize, embeddingSize, hiddenSize).to(device) else: print('Invalid entry for model type. Should be one of rnn, lstm or gru') assert False criterion = nn.BCEWithLogitsLoss().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=momentum, nesterov=True) #optimizer = optim.Adam(model.parameters(),lr=args.lr) totalLoss = 0 print('training started') for epoch in range(1, args.epoch + 1):
def train_model(epochs=1, load=False): tokenizer = Tokenizer.from_file("bpe-fi.tokenizer.json") vocab = tokenizer.get_vocab() vocab_size = max(vocab.values()) + 1 input_len = config["input_len"] dim = config["dim"] lstm_layers = config["lstm_layers"] dense_layers = config["dense_layers"] BATCH_SIZE = 2**12 print("Vocab size", vocab_size) if not load: model = lstm(vocab_size=vocab_size, input_len=input_len, dim=dim, lstm_layers=lstm_layers, dense_layers=dense_layers) else: model = tf.keras.models.load_model('./saved_models/' + model_str(config)) model.compile( optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)) # Create training data set print("Load data...") data = np.load("out.npy") #[:1000000] dlen = len(data) print("Done.") steps_per_epoch = dlen // BATCH_SIZE perm = np.random.permutation(steps_per_epoch) * BATCH_SIZE print("steps_per_epoch", steps_per_epoch) print("perm", perm[:10]) def data_generator(dataset): #dataset = tf.constant(dataset) batch = 0 while True: j = perm[batch] data_x = [] data_y = [] for b in range(BATCH_SIZE): i = j % (dlen - input_len - 1) train_x = dataset[i:i + input_len] train_y = dataset[i + input_len] #train_y = tf.keras.utils.to_categorical(train_y, num_classes=vocab_size) data_x.append(train_x) data_y.append(train_y) j += 1 data_x = np.array(data_x) data_y = np.array(data_y) #print(data_x.shape) #print(data_y.shape) batch += 1 batch = batch % steps_per_epoch yield (data_x, data_y) print("BATCHES IN TOTAL", steps_per_epoch) generator = data_generator(data) print("NEXT", next(generator)) #print(next(generator)) print("Fit the model...") # Fit the model tf.profiler.experimental.start("logdir") model.fit(x=generator, epochs=epochs, batch_size=BATCH_SIZE, use_multiprocessing=True, steps_per_epoch=steps_per_epoch) print("Done.") tf.profiler.experimental.stop() # Save the model model.save('./saved_models/' + model_str(config)) print("Model saved.") return model
import pickle import tensorflow as tf import tensorflow as tf tf.debugging.set_log_device_placement(True) try: with tf.device('/device:GPU:0'): a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) c = tf.matmul(a, b) except RuntimeError as e: print(e) with open('/data/phucnq/data/video_classification/x_train.pkl', 'rb') as f: x_train = pickle.load(f) with open('/data/phucnq/data/video_classification/y_train.pkl', 'rb') as f: y_train = pickle.load(f) x_train = x_train.reshape(61607, 1, 2048) import model as m model = m.lstm((1, 2048), 101) model.fit(x_train, y_train, epochs=100, batch_size=32)
n = 40336 split = 0.8 ind = np.random.permutation(n) div = int(n * split) partition = dict([]) partition['train'] = list(ind[:div]) partition['validation'] = list(ind[div:n]) '''TCN''' #model = TCN(40, 1, [64, 48], fcl=32, kernel_size=2, dropout=0.4).to(args.device) #criterion = nn.BCEWithLogitsLoss(pos_weight=torch.DoubleTensor([1.8224]).to(args.device), reduction='none') #optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.75, 0.99)) ##optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) model = lstm(embedding=64, hidden_size=64, fcl=32, num_layers=2, batch_size=batch_size, fcl_out=False, embed=True, droprate=0.25).to(args.device) #model = LSTM_attn(embedding=32, hidden_size=64, num_layers=2, batch_size=batch_size, embed=True, droprate=0.25).to(args.device) criterion = nn.BCEWithLogitsLoss(pos_weight=torch.DoubleTensor([1.8224]).to(args.device), reduction='none') #1.8224 #criterion = nn.MSELoss(reduction='none') #1.8224 optimizer = optim.SGD(model.parameters(), lr=1, momentum=0.5) lr_lambda = lambda epoch: 0.9 ** (epoch) scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, factor=0.1, patience=5) if load_model: model.load_state_dict(torch.load(load_model)) train_losses = np.concatenate((np.load(load_folder +'/train_losses.npy'), np.zeros(epochs))) train_pos_acc = np.concatenate((np.load(load_folder +'/train_pos_acc.npy'), np.zeros(epochs)))
bot_x_train, bot_x_test, bot_y_train, bot_y_test = train_test_split( bots, bots_label, test_size=0.01, shuffle=True) user_x_train, user_x_test, user_y_train, user_y_test = train_test_split( users, users_label, test_size=0.01, shuffle=True) train_x = np.concatenate((bot_x_train, user_x_train)) train_label = np.concatenate((bot_y_train, user_y_train)) test_x = np.concatenate((bot_x_test, user_x_test)) test_label = np.concatenate((bot_y_test, user_y_test)) train_num = len(train_x) print("Dataset Information" + '\n') print(train_x.shape) #(20146, 48, 11) print(train_label.shape) #(20146, 2) print(test_x.shape) #(5038, 48, 11) print(test_label.shape) #(5038, 2) #학습 및 Test 시작 model.lstm(train_x, train_label, test_x, test_label, seq_length, data_dim, hidden_dim, batch_size, n_class, learning_rate, total_epochs) ''' 교차검증 구성 #10-Fold 구성 KF = KFold(n_splits=10, random_state=None, shuffle=False) train_bot, test_bot = train_test_split(bots, test_size=0.1, shuffle=False) train_user, test_user = train_test_split(users, test_size=0.1, shuffle=False) '''
def main(_): # Make checkpoint directory if not os.path.exists(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) # Create a dataset object if label_type == 'one_hot': data=utils.DataOneHot(debug=args.debug, patch_overlap=args.patch_overlap, im_size=args.im_size, \ band_n=args.band_n, t_len=args.t_len, path=args.path, class_n=args.class_n, pc_mode=args.pc_mode, \ test_n_limit=args.test_n_limit,memory_mode=args.memory_mode, \ balance_samples_per_class=args.balance_samples_per_class, test_get_stride=args.test_get_stride, \ n_apriori=args.n_apriori,patch_length=args.patch_len,squeeze_classes=args.squeeze_classes,im_h=args.im_h,im_w=args.im_w, \ id_first=args.id_first, train_test_mask_name=args.train_test_mask_name, \ test_overlap_full=args.test_overlap_full,ram_store=args.ram_store,patches_save=args.patches_save) elif label_type == 'semantic': data=utils.DataSemantic(debug=args.debug, patch_overlap=args.patch_overlap, im_size=args.im_size, \ band_n=args.band_n, t_len=args.t_len, path=args.path, class_n=args.class_n, pc_mode=args.pc_mode, \ test_n_limit=args.test_n_limit,memory_mode=args.memory_mode, \ balance_samples_per_class=args.balance_samples_per_class, test_get_stride=args.test_get_stride, \ n_apriori=args.n_apriori,patch_length=args.patch_len,squeeze_classes=args.squeeze_classes,im_h=args.im_h,im_w=args.im_w, \ id_first=args.id_first, train_test_mask_name=args.train_test_mask_name, \ test_overlap_full=args.test_overlap_full,ram_store=args.ram_store,patches_save=args.patches_save) # Load images and create dataset (Extract patches) if args.memory_mode == "ram": data.create() deb.prints(data.ram_data["train"]["ims"].shape) # Run tensorflow session with tf.Session() as sess: # Create a neural network object (Define model graph) if args.model == 'convlstm': model = conv_lstm(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'conv3d': model = Conv3DMultitemp(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'unet': model = UNet(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'smcnn': model = SMCNN(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'smcnnlstm': model = SMCNNlstm(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'smcnn_unet': model = SMCNN_UNet(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'smcnn_conv3d': model = SMCNN_conv3d(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'lstm': model = lstm(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'convlstm_semantic': model = conv_lstm_semantic(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) elif args.model == 'smcnn_semantic': model = SMCNN_semantic(sess, batch_size=args.batch_size, epoch=args.epoch, train_size=args.train_size, timesteps=args.timesteps, patch_len=args.patch_len, kernel=args.kernel, channels=args.channels, filters=args.filters, n_classes=args.n_classes, checkpoint_dir=args.checkpoint_dir, log_dir=args.log_dir, data=data.ram_data, conf=data.conf, debug=args.debug) if args.phase == 'train': # Train only once model.train(args) elif args.phase == 'repeat': # Train for a specific number of repetitions model.train_repeat(args) elif args.phase == 'test': # Test best model from experiment repetitions model.test(args)
dataset = [ midiread(f, r, dt).piano_roll.astype(numpy.float32) for f in train_files ] num_samples = len(dataset) print 'total samples: %d' % num_samples x = tf.placeholder(tf.float32, [None, n_steps, n_input]) state = tf.placeholder(tf.float32, [None, n_hidden]) output = tf.placeholder(tf.float32, [None, n_hidden]) learning_rate_variable = tf.Variable(float(learning_rate), trainable=False) learning_rate_decay_op = learning_rate_variable.assign(learning_rate_variable * learning_rate_decay) cost, train_op, generator = model.lstm(x, state, output, n_steps, n_input, n_hidden, n_output, learning_rate_variable) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) losses = collections.deque(maxlen=3) loss_per_check = 0.0 for epoch in xrange(num_epochs): numpy.random.shuffle(dataset) # dataset: n_samples * [n_frames, n_input] for b in range(0, len(dataset) - batch_size + 1, batch_size): data_batch = dataset[b:b + batch_size] min_len = min([len(sequence) for sequence in data_batch]) total_cost = list() for i in range(0, min_len - n_steps + 1, n_steps): x_batch = numpy.concatenate([
if df.iloc[i, 6] > '2018': pre_x[df.iloc[i, 2]].append(data_scaled[i - 3:i, 0]) pre_y.append(data_scaled[i, 0]) train = myDataset(train_x, train_y) test = myDataset(test_x, test_y) pre_d = dict() for r in pre_x: pre_d[r] = DataLoader(myDataset(pre_x[r], pre_y), batch_size=1, num_workers=4, shuffle=False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = lstm(train_x[0].shape[0]) model = model.to(device) criterion = nn.MSELoss().to(device) optimizer = optim.Adam(model.parameters(), lr=0.01) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 4, gamma=0.7, last_epoch=-1) train_loader = DataLoader(train, batch_size=1, num_workers=4, shuffle=False) epoch = 0 prev = float('inf') model.load_state_dict(torch.load('best_model.pkl'))
if month == 13: month = 1 cur = np.zeros([1, 15], dtype=np.double) cur[0][0] = data_scaled[i - 1, 0] cur[0][1] = data_scaled[i - 2, 0] cur[0][2] = data_scaled[i - 3, 0] cur[0][month + 2] = 1 train_x.append(cur) train_y.append(data_scaled[i, 0]) train = myDataset(train_x, train_y) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = lstm(1) model = model.to(device) criterion = nn.MSELoss().to(device) optimizer = optim.Adam(model.parameters(), lr=0.01) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 4, gamma=0.7, last_epoch=-1) train_loader = DataLoader(train, batch_size=1, num_workers=4, shuffle=False) epoch = 0 prev = float('inf') while (True):
from model.lstm import * import gensim from gensim.models import word2vec sls = lstm("bestsem.p", load=True, training=False) sa = "A truly wise man" sb = "He is smart" print sls.predict_similarity(sa, sb) * 4.0 + 1.0