def train(args): assert args.num_classes common.make_dir(args.checkout_dir) nnet = DNN((args.left_context + args.right_context + 1) * args.feat_dim, hidden_layer, \ hidden_size, args.num_classes, dropout=dropout) print(nnet) nnet.cuda() criterion = nn.CrossEntropyLoss() optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate) train_dataset = THCHS30(root=args.data_dir, data_type='train', left_context=left_context, right_context=right_context, model_type='dnn') train_loader = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch, shuffle=True, num_workers=6) test_dataset = THCHS30(root=args.data_dir, data_type='test', left_context=left_context, right_context=right_context, model_type='dnn') test_loader = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch, shuffle=True, num_workers=6) cross_validate(-1, nnet, test_loader, test_dataset.num_frames) for epoch in range(args.num_epochs): common.train_one_epoch(nnet, criterion, optimizer, train_loader) cross_validate(epoch, nnet, test_loader, test_dataset.num_frames) th.save(nnet, common.join_path(args.checkout_dir, 'dnn.{}.pkl'.format(epoch + 1)))
def Generator(source, target, pixelda=False, is_resize = True, dataset = 'NW', sensor_num = 0): if is_resize: return source2target_square.Feature(dataset = dataset, sensor_num = sensor_num) else: return DNN.Feature()
def __init__(self, p, lr, game="CartPole-v1", mean_bound=5, reward_bound=495.0, save_model=10): """ Constructor of the agent class. - game="CartPole-v1" : Name of the game environment - mean_bound=5 : Number of last acquired rewards considered for mean reward - reward_bound=495.0 : Reward acquired for completing an episode properly - save_model=10 : Interval for saving the model - p : Percentile for selecting training data - lr : Learning rate for the CE model """ # Environment variables self.game = game self.env = gym.make(self.game) self.num_states = self.env.observation_space.shape[0] self.num_actions = self.env.action_space.n # Agent variables self.p = p * 100 self.mean_bound = mean_bound self.reward_bound = reward_bound # DQN variables self.lr = lr self.model = DNN(self.num_states, self.num_actions, self.lr) self.save_model = save_model # File paths directory = os.path.dirname(__file__) self.path_model = os.path.join(directory, "../models/dnn.h5") self.path_plot = os.path.join(directory, "../plots/dnn.png") # Load model, if it already exists try: self.model.load(self.path_model) except: print(f"Model does not exist! Create new model...")
def inference(args, cnn_features): tf.reset_default_graph() with tf.Session() as sess: net = DNN(sess) net.build_model() net.inference(cnn_features=cnn_features, label_file=args.label_file, gen_from=args.gen_from, out_path=args.output_folder, bsize=args.bsize)
def train_dnn(): params = { "offline_model_dir": "../weights", # deep part score fn "fc_type": "fc", "fc_dim": 32, "fc_dropout": 0., } params.update(params_common) X_train, X_valid = load_data("train"), load_data("vali") X_test = load_data("test") model = DNN("ranking", params, logger) model.fit(X_train, validation_data=X_valid) model.save_session() model.predict(X_test, 'pred.txt')
def predict(): device = "cuda:0" if torch.cuda.is_available() else "cpu" df = pd.read_csv(config.TEST_PATH, header=None) dataset = PicDataset(df.loc[:, 1:]) preds = np.zeros((len(dataset), 256)) for i in range(5): temp = np.zeros((len(dataset), 256)) model = DNN() model.load_state_dict(torch.load(f'./models/model_{i}.bin')) model.to(device) model.eval() for j in range(len(dataset)): x, _ = dataset[j] x = x.to(device) y = model(x) temp[j, :] = y.detach().cpu().numpy() preds += temp preds /= 5 df = pd.DataFrame(np.concatenate([np.arange(1, 921).reshape(-1, 1), preds], axis=1), columns=np.arange(257)) df[0] = df[0].astype('int') df.to_csv('./predictions.csv', index=False, header=False)
def run(): df = pd.read_csv(config.TRAIN_PATH) kfold = KFold(n_splits=5, random_state=config.SEED, shuffle=True) fold_losses = [] for i, (train_idx, val_idx) in enumerate(kfold.split(df)): print("-------------------------------------------------------") print(f"Training fold {i}") print("-------------------------------------------------------") train = df.iloc[train_idx] validation = df.iloc[val_idx] train_dataset = PicDataset(train) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=config.BATCH_SIZE ) val_dataset = PicDataset(validation) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=config.BATCH_SIZE ) device = 'cuda:0' if torch.cuda.is_available() else "cpu" model = DNN() model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=config.LR) loss = 0 for _ in range(config.EPOCHS): engine.train_fn(train_data_loader, model, optimizer, device) loss = engine.eval_fn(val_data_loader, model, device) print(f"Loss on fold {i} is {loss}") fold_losses.append(loss) torch.save(model.state_dict(), f'./models/model_{i}.bin') print(f"Average loss on cross validation is {sum(fold_losses) / 5}")
X_train, y_train, k = fetch_data() print('Done') # split the data into train and test randomly X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2) scaler = MinMaxScaler(feature_range=(1, 10)) X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) y_train = scaler.fit_transform(y_train.reshape([y_train.shape[0], 1])) y_test = scaler.transform(y_test.reshape([y_test.shape[0], 1])) # scaling input features scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # making target array a column vector y_train = y_train.reshape((y_train.shape[0], 1)) y_test = y_test.reshape((y_test.shape[0], 1)) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) print('Starting') print('Training the model') # call our model regr = DNN() prediction = regr.fit(X_train, y_train, k, X_test) error = np.mean(abs(1 - (y_test / (prediction + 1e-8)))) print('Test error is: ' + str(error * 100.00) + '%')
os.makedirs(args.output) except OSError: pass try: os.makedirs(args.log) except OSError: pass # 加载训练数据和测试数据 train_loader, test_loader = get_data(args) # 如果有cuda就用cuda device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') input_size = 28 * 28 output_size = args.num_classes model = DNN(input_size=input_size, output_size=output_size).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # 每训练完一个mini-batch就计算一次loss,acc model.train() for epoch in range(args.epochs): correct = 0 total = 0 for idx, (x, y) in enumerate(train_loader): x, y = x.to(device), y.to(device) y_pred = model(x) _, y_pred_t = torch.max(y_pred.data, 1) total += y.size(0)
required=True, help='Path to load the trained DML model') parser.add_argument('-pl', '--positive_labels', default='ESLMV', help='Labels in CC_WEB_VIDEO datasets that ' 'considered posetive. default=\'ESLMV\'') args = vars(parser.parse_args()) print 'loading data...' cc_dataset = pk.load(open('datasets/cc_web_video.pickle', 'rb')) cc_features = np.load(args['evaluation_set']) model = DNN(cc_features.shape[1], None, args['model_path'], load_model=True, trainable=False) cc_embeddings = model.embeddings(cc_features) print 'Evaluation set file: ', args['evaluation_set'] print 'Path to DML model: ', args['model_path'] print 'Positive labels: ', args['positive_labels'] print '\nEvaluation Results' print '==================' similarities = calculate_similarities(cc_dataset['queries'], cc_embeddings) mAP, pr_curve = evaluate(cc_dataset['ground_truth'], similarities, positive_labels=args['positive_labels'], all_videos=False) print 'CC_WEB_VIDEO mAP: ', mAP
#! -*- coding: utf-8 -*- import os import torch import time import torch.nn.functional as F from args import args from model import DNN from utils import cos_sim from processing import BuildExamples # 加载模型 MODEL_PATH = os.path.join(args.get('data_path'), 'model/embedded_adam_0.001_19.model') print(MODEL_PATH) model = DNN(vocab_size=5265, embedding_size=200, hidden_size=512) model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu'))) # 加载字典 corpus = BuildExamples() corpus.load_vocabulary(path=os.path.join(args.get('data_path'), 'vocabulary.txt')) MAX_LEN = args.get('max_len', 20) def get_hidden_state(sentence): line = sentence.strip().split() line = ['bos'] + line + ['eos'] sentence2id = torch.LongTensor([corpus.words2id.get(item, 0) for item in line]).view(1, -1) with torch.no_grad():
def optimize(X_train, y_train, X_val, y_val, n_hidden1=300, n_hidden2=100, dropout_rate=0.5, epochs=10, batch_size=64, learning_rate=1e-3): # Training # ================================================== # Generate batches def fetch_batch(batch_size, iteration, epoch): np.random.seed(epoch) shuffled_indices = np.random.permutation(X_train.shape[0]) indices = shuffled_indices[batch_size * iteration:batch_size * (iteration + 1)] return X_train[indices, :], y_train[indices] dnn = DNN(height=HEIGHT, width=WIDTH, n_outputs=10, n_hidden1=n_hidden1, n_hidden2=n_hidden2, dropout_rate=dropout_rate, seed=RANDOM_STATE) n_train = X_train.shape[0] # Define Training procedure train_op = tf.train.AdamOptimizer(learning_rate).minimize(dnn.loss) extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Output directory for models and summaries out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", get_time())) info("Writing to {}\n".format(out_dir)) # Summaries for loss loss_summary = tf.summary.scalar("loss", dnn.loss) # Train Summaries train_summary_op = tf.summary.merge([loss_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, tf.get_default_graph()) # Val summaries val_summary_op = tf.summary.merge([loss_summary]) val_summary_dir = os.path.join(out_dir, "summaries", "val") val_summary_writer = tf.summary.FileWriter(val_summary_dir, tf.get_default_graph()) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(max_to_keep=10) session_conf = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=session_conf) as sess: # Initialize all variables sess.run(tf.global_variables_initializer()) for epoch in range(epochs): # Training loop. For each batch... for iteration in range(n_train // batch_size): X_batch, y_batch = fetch_batch(batch_size, iteration, epoch) sess.run([train_op, extra_update_ops], feed_dict={ dnn.training: True, dnn.X: X_batch, dnn.y: y_batch }) # Evaluates model on a training set summary_train, acc_train = sess.run( [train_summary_op, dnn.accuracy], feed_dict={ dnn.X: X_batch, dnn.y: y_batch }) train_summary_writer.add_summary(summary_train, global_step=epoch) # Evaluates model on a validation set summary_val, acc_val = sess.run([val_summary_op, dnn.accuracy], feed_dict={ dnn.X: X_val, dnn.y: y_val }) val_summary_writer.add_summary(summary_val, global_step=epoch) info( "Epoch: {0:3d}, Train accuracy: {1:.4f}, Val accuracy: {2:.4f}" .format(epoch, acc_train, acc_val)) # Save model path = saver.save(sess, checkpoint_prefix, global_step=epoch) debug("Saved model checkpoint to {}".format(path)) cmd_text = 'python evaluate.py --checkpoint %s ...' % checkpoint_prefix info("Training complete. For evaluation: {}".format(cmd_text))
class Agent: """ Class representing a learning agent acting in an environment. """ def __init__(self, p, lr, game="CartPole-v1", mean_bound=5, reward_bound=495.0, save_model=10): """ Constructor of the agent class. - game="CartPole-v1" : Name of the game environment - mean_bound=5 : Number of last acquired rewards considered for mean reward - reward_bound=495.0 : Reward acquired for completing an episode properly - save_model=10 : Interval for saving the model - p : Percentile for selecting training data - lr : Learning rate for the CE model """ # Environment variables self.game = game self.env = gym.make(self.game) self.num_states = self.env.observation_space.shape[0] self.num_actions = self.env.action_space.n # Agent variables self.p = p * 100 self.mean_bound = mean_bound self.reward_bound = reward_bound # DQN variables self.lr = lr self.model = DNN(self.num_states, self.num_actions, self.lr) self.save_model = save_model # File paths directory = os.path.dirname(__file__) self.path_model = os.path.join(directory, "../models/dnn.h5") self.path_plot = os.path.join(directory, "../plots/dnn.png") # Load model, if it already exists try: self.model.load(self.path_model) except: print(f"Model does not exist! Create new model...") def get_action(self, state): """ Returns an action for a given state, based on the current policy. - state : Current state of the agent """ state = state.reshape(1, -1) policy = self.model.predict(state)[0] action = np.random.choice(self.num_actions, p=policy) return action def sample(self, num_episodes): """ Returns samples of state/action tuples for a given number of episodes. - num_episodes : Number of episodes to sample """ episodes = [[] for _ in range(num_episodes)] rewards = [0.0 for _ in range(num_episodes)] for episode in range(num_episodes): state = self.env.reset() total_reward = 0.0 while True: action = self.get_action(state) next_state, reward, done, _ = self.env.step(action) episodes[episode].append((state, action)) state = next_state # Penalize agent if pole could not be balanced until end of episode. if done and reward < 499.0: reward = -100.0 total_reward += reward if done: total_reward += 100.0 rewards[episode] = total_reward break return rewards, episodes def get_training_data(self, episodes, rewards): """ Returns training data for the CE model. - episodes : List of state/action tuples - rewards : List of gained rewards """ x_train, y_train = [], [] reward_bound = np.percentile(rewards, self.p) for episode, reward in zip(episodes, rewards): if reward >= reward_bound: states = [step[0] for step in episode] actions = [step[1] for step in episode] x_train.extend(states) y_train.extend(actions) x_train = np.asarray(x_train) y_train = to_categorical(y_train, num_classes=self.num_actions) return x_train, y_train, reward_bound def train(self, num_epochs, num_episodes, report_interval): """ Trains the CE model for a given number of epochs and episodes. Outputting report information is controlled by a given time interval. - num_epochs : Number of epochs to train - num_episodes : Number of episodes to train - report_interval : Interval for outputting report information of training """ total_rewards = [] for epoch in range(1, num_epochs + 1): if epoch % self.save_model == 0: self.model.save(self.path_model) rewards, episodes = self.sample(num_episodes) x_train, y_train, reward_bound = self.get_training_data( episodes, rewards) mean_reward = np.mean(rewards) total_rewards.extend(rewards) mean_total_reward = np.mean(total_rewards[-self.mean_bound:]) if epoch % report_interval == 0: print(f"Epoch: {epoch + 1}/{num_epochs}" f"\tMean Reward: {mean_reward : .2f}" f"\tReward Bound: {reward_bound : .2f}") self.plot_rewards(total_rewards) if mean_total_reward > self.reward_bound: self.model.save(self.path_model) self.model.fit(x_train, y_train) self.model.save(self.path_model) def play(self, num_episodes): for episode in range(1, num_episodes + 1): state = self.env.reset() total_reward = 0.0 while True: self.env.render() action = self.get_action(state) state, reward, done, _ = self.env.step(action) total_reward += reward if done: print(f"Episode: {episode + 1}/{num_episodes}" f"\tReward: {total_reward : .2f}") break def plot_rewards(self, total_rewards): x = range(len(total_rewards)) y = total_rewards slope, intercept, _, _, _ = linregress(x, y) plt.plot(x, y, linewidth=0.8) plt.plot(x, slope * x + intercept, color="red", linestyle="-.") plt.xlabel("Episode") plt.ylabel("Reward") plt.title("CE-Learning") plt.savefig(self.path_plot)
Reg = None #Regularization: L1,L2,None alpha = 0.1 dropout = 0.5 #(0,1.0] 1.0 mean without dropout iteration = 2000 step = 50 fig, ax = plt.subplots(num_layers + 1, 2) #fig.suptitle('Weights/Bias Distribution') loss_all = np.zeros([int(iteration / step), 2]) fig.tight_layout() fig2 = plt.figure() fig2.suptitle('Learning curve') learning_curve = fig2.add_subplot(111) ############################## mymodel = DNN.model(num_layers, num_neurons, input_size, output_size, Reg, alpha) with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=10) writer = tf.summary.FileWriter(save_dir, sess.graph) sess.run(tf.global_variables_initializer()) for i in range(iteration): #batch_xs, batch_ys = mnist.train.next_batch(200) now = i % 5 batch_xs = mnist.train.images[i * 200:(i + 1) * 200, :] batch_ys = mnist.train.labels[i * 200:(i + 1) * 200, :] _, accu, loss, merge = sess.run([ mymodel.optimize, mymodel.accuracy, mymodel.cross_entropy, mymodel.sum_train ],
def main(): # do thing before training args = get_args() save_dir = os.path.join(args.sd, args.tm, args.ft) print(args) input("*****Please check the params also --> {} <--, Enter to continue*****".format(save_dir)) os.system('mkdir -p {}'.format(save_dir)) mode = args.mode batch_size = args.bs feature_type = args.ft num_epochs = args.ne # loading train data if mode == "train": train_data, train_label = load_data("train", train_protocol, mode=mode, feature_type=feature_type) # for i in range(len(train_data)): # mean = np.mean(train_data[i], axis=0) # std = np.std(train_data[i], axis=0) # train_data[i] = (train_data[i] - mean) / std train_dataset = ASVDataSet(train_data, train_label, mode=mode) train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=True) dev_data, dev_label = load_data("dev", dev_protocol, mode=mode, feature_type=feature_type) # for i in range(len(dev_data)): # mean = np.mean(dev_data[i], axis=0) # std = np.std(dev_data[i], axis=0) # dev_data[i] = (dev_data[i] - mean) / std dev_dataset = ASVDataSet(dev_data, dev_label, mode=mode) dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size, num_workers=2, shuffle=False) elif mode == "final": train_data, train_label = load_data(["train", "dev"], final_protocol, mode=mode, feature_type=feature_type) train_data = np.array(train_data) mean = np.mean(train_data, axis=0) std = np.std(train_data, axis=0) train_data = (train_data - mean) / std train_dataset = ASVDataSet(train_data, train_label, mode="train") train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=True) if "lcnn" in args.tm: model = LCNN(input_dim=77, num_classes=2) elif "dnn" in args.tm: model = DNN(990, 512, 2) # mfcc imfcc cqt 429 cqcc 990 elif "vgg" in args.tm: model = VGG(77, "VGG11") elif "cnn" in args.tm: model = CNN(77, 2, 0) if use_cuda(): model = model.cuda() print(model) cross_entropy = nn.CrossEntropyLoss() optimizer = optim.ASGD(params=model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = ReduceLROnPlateau(optimizer, patience=0, verbose=True, factor=0.1, min_lr=1e-7) best_dev_accuracy = 0 best_train_accuracy = 0 for epoch in range(num_epochs): correct = 0 total = 0 train_loss = 0 model.train() for tmp in tqdm(train_dataloader, desc="Epoch {}".format(epoch + 1)): data = Variable(tmp['data']) label = Variable(tmp['label']).view(-1) if use_cuda(): data, label = data.cuda(), label.cuda() optimizer.zero_grad() predict = model(data) _, predict_label = torch.max(predict.data, 1) correct += (predict_label.cpu() == label.cpu().data).sum() total += label.size(0) loss = cross_entropy(predict, label.long()) loss.backward() optimizer.step() train_loss += loss.data[0] train_accuracy = correct / total if mode == "final": if train_accuracy > best_train_accuracy: best_train_accuracy = train_accuracy save_checkpoint( {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': train_accuracy}, save_path=os.path.join(save_dir, "best_eval.pkl") ) save_checkpoint( {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': train_accuracy}, save_path=os.path.join(save_dir, "final_eval.pkl") ) print("Epoch [%d/%d], Loss: %.4fe-4, Train Acc %.2f%%" % ( epoch+1, num_epochs, 1e4 * train_loss / total, train_accuracy * 100)) print(print_str.format("Best Acc: {}".format(best_train_accuracy))) scheduler.step(train_loss/total) if use_cuda(): model.cuda() if mode == "train": dev_accuracy, dev_loss = get_test_accuracy(dev_dataloader, model, cross_entropy) save_checkpoint( {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': dev_accuracy}, save_path=os.path.join(save_dir, 'final_dev.pkl') ) if dev_accuracy > best_dev_accuracy: best_dev_accuracy = dev_accuracy save_checkpoint( {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': dev_accuracy}, save_path=os.path.join(save_dir, 'best_dev.pkl') ) if use_cuda(): model.cuda() print("Epoch [%d/%d], Train Loss: %.4fe-4, Train Acc %.2f%% Dev Loss: %.4fe-4 Dev Acc %.2f%% " % ( epoch + 1, num_epochs, 1e4 * train_loss / total, train_accuracy * 100, dev_loss, dev_accuracy * 100 )) print(print_str.format("Best Acc: {}".format(best_dev_accuracy))) scheduler.step(dev_loss)
# 评估 fm_pre = model(X_test) fm_pre = [1 if x > 0.5 else 0 for x in fm_pre] #**************** Statement 2 of Training *****************# # 获取FM训练得到的隐向量 v = model.variables[2] #[None, onehot_dim, k] X_train = tf.cast(tf.expand_dims(X_train, -1), tf.float32) #[None, onehot_dim, 1] X_train = tf.reshape(tf.multiply(X_train, v), shape=(-1, v.shape[0] * v.shape[1])) #[None, onehot_dim*k] hidden_units = [256, 128, 64] model = DNN(hidden_units, 1, 'relu') optimizer = optimizers.SGD(0.0001) train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)) train_dataset = train_dataset.batch(32).prefetch( tf.data.experimental.AUTOTUNE) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) model.fit(train_dataset, epochs=50) # 评估 X_test = tf.cast(tf.expand_dims(X_test, -1), tf.float32) X_test = tf.reshape(tf.multiply(X_test, v), shape=(-1, v.shape[0] * v.shape[1])) fnn_pre = model(X_test)
for st in train_x1: for w in st: tmp.add(w) word_index = {w: i for i, w in enumerate(tmp)} #import pickle #with open("bow.pkl", "wb") as f: # pickle.dump(word_index, f) x = torch.zeros(len(train_x1), len(word_index)) for i in range(len(train_x1)): for w in train_x1[i]: x[i][word_index[w]] += 1 print(x.size()) print("\nConstructing model...", flush=True) model = DNN(x.size(1)).to(device) total_param = sum(p.numel() for p in model.parameters()) trainable_param = sum(p.numel() for p in model.parameters() if p.requires_grad) print("{} parameters with {} trainable".format(total_param, trainable_param), flush=True) print("\nStart training...", flush=True) train_dataset1 = TwitterDataset(x, train_y1) train_loader1 = torch.utils.data.DataLoader(dataset=train_dataset1, batch_size=BATCH, shuffle=True, num_workers=4) train_model(train_loader1, model, device, LR)
def train_plot(training_data): #plot the training data plt.plot(training_data['loss'], linewidth=2, label='Train') plt.plot(training_data['val_loss'], linewidth=2, label='Valid') plt.legend(loc='upper right') plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.show() if __name__ == '__main__': #import data df = pd.read_csv('Book1.csv', index_col = 0) x_train, x_valid, y_train, y_valid, X, y = preprocessing(df) input_dim = x_train.shape[1] #create an instance of the model class model = DNN(input_dim) model.summary() #set save path and train model.set_save_path('dnn_v1.h5') output = model.fit(x_train, y_train, x_valid, y_valid) train_plot(output) #load model and predict model.load_model('dnn_v1.h5') result = model.predict(X)
# create folder for generated data gen_data_path = os.path.join(out_path, gen_data_fdr) if not os.path.exists(gen_data_path): os.makedirs(gen_data_path) if not os.path.exists(scaler_dir): os.makedirs(scaler_dir) print('here') # create folder for model checkpoints checkpoint_path = os.path.join(out_path, checkpoint_fdr) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) #Your statements here model = DNN(input_size, hidden_size, out_size) model = torch.nn.DataParallel(model.to(device), device_ids=use_devices) #print(model) #save_dir = "FPGA" checkpoint = torch.load( './segan_data_out/20200422_0713/checkpoints/state-20.pkl') #from collections import OrderedDict state_dict = checkpoint['DNN'] #for k, v in state_dict.items(): # name = k[7:] # remove `module.` # v = v.cpu().numpy() # np.savetxt(os.path.join(save_dir,name), v, newline="\n") # print(name, v.shape) scaler_path_input = os.path.join(scaler_dir, "scaler_input.p") scaler_input = pickle.load(open(scaler_path_input, 'rb')) scaler_path_label = os.path.join(scaler_dir, "scaler_label.p")
print 'loading data...' train_set = np.load(args['train_set']) triplets = np.load(args['triplets']) if args.get('evaluation_set'): args['injection'] = np.min([args['injection'], 10000]) print 'Evaluation set file: ', args['evaluation_set'] print 'Evaluation triplet file: ', args['evaluation_triplets'] print 'Injected triplet: ', args['injection'] print 'loading data...' evaluation_set = np.load(args['evaluation_set']) eval_triplets = np.load(args['evaluation_triplets']) + len(train_set) np.random.shuffle(eval_triplets) train_set = np.concatenate([train_set, evaluation_set], axis=0) triplets = np.concatenate([triplets, eval_triplets[:args['injection']]], axis=0) try: layers = [int(l) for l in args['layers'].split(',') if l] except Exception as e: raise Exception('--layers argument is in wrong format. Specify the number ' 'of neurons in each layer separated by a comma \',\'') model = DNN(train_set.shape[1], args['model_path'], hidden_layer_sizes=layers, learning_rate=args['learning_rate'], weight_decay=args['weight_decay'], gamma=args['gamma']) train_dml_network(model, train_set, triplets, args['epochs'], args['batch_sz'])
def DomainClassifier(source, target, is_resize = True, dataset = 'NW'): if is_resize: return source2target_square.DomainPredictor(dataset = dataset) else: return DNN.DomainPredictor()
def train(args, config, io): train_loader, validation_loader = get_loader(args, config) device = torch.device("cuda" if args.cuda else "cpu") # print(len(train_loader), len(validation_loader)) #Try to load models model = DNN(args).to(device) """if device == torch.device("cuda"): model = nn.DataParallel(model)""" if args.model_path != "": model.load_state_dict(torch.load(args.model_path)) # for para in list(model.parameters())[:-5]: # para.requires_grad=False # print(model) if args.use_sgd: # print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: # print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) """opt = optim.Adam([ {'params': list(model.parameters())[:-1], 'lr':args.lr/50, 'weight_decay': 1e-4}, {'params': list(model.parameters())[-1], 'lr':args.lr, 'weight_decay': 1e-4} ]) """ scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = nn.MSELoss() best_test_loss = 9999999. for epoch in range(args.epochs): startTime = time.time() #################### # Train #################### train_loss = 0.0 train_dis = 0.0 count = 0.0 model.train() for data, label in train_loader: data, label = data.to(device), label.to(device) data = drop(jitter(data, device), device) # data = jitter(data, device, delta=0.05) batch_size = data.shape[0] logits = model(data) loss = criterion(logits, label) opt.zero_grad() loss.backward() opt.step() dis = distance(logits, label) count += batch_size train_loss += loss.item() * batch_size train_dis += dis.item() * batch_size scheduler.step() outstr = 'Train %d, loss: %.6f, distance: %.6f' % ( epoch, train_loss * 1.0 / count, train_dis * 1.0 / count) io.cprint(outstr) #################### # Evaluation #################### test_loss = 0.0 test_dis = 0.0 count = 0.0 model.eval() with torch.no_grad(): for data, label in validation_loader: data, label = data.to(device), label.to(device) batch_size = data.shape[0] logits = model(data) loss = criterion(logits, label) dis = distance(logits, label) count += batch_size test_loss += loss.item() * batch_size test_dis += dis.item() * batch_size outstr = 'Test %d, loss: %.6f, distance: %.6f' % ( epoch, test_loss * 1.0 / count, test_dis * 1.0 / count) io.cprint(outstr) if test_loss <= best_test_loss: best_test_loss = test_loss torch.save(model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name) torch.save(model, (config.root + config.model_path)) io.cprint('Time: %.3f sec' % (time.time() - startTime))
def RMSE(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) X_data_name_1 = '../../.npz' y_data_name_1 = '../../.npz' X_data_name_2 = '../../.npz' y_data_name_2 = '../../.npz' X_train, y_train = load_from_npz(X_data_name_1), load_from_npz(y_data_name_1) X_test, y_test = load_from_npz(X_data_name_2), load_from_npz(y_data_name_2) X_train, X_test = normalize(X_train, X_test) # %% DNN (only fully-connected layer) dnn = DNN() X_train = np.reshape(X_train, (len(X_train), len(X_train[1]) * len(X_train[2]) * 3)) X_test = np.reshape(X_test, (len(X_test), len(X_train[1]) * len(X_train[2]) * 3)) #optimization details adam = Adam(lr=lrf, decay=lr_decay) dnn.compile(loss='mean_squared_error', optimizer=adam, metrics=[RMSE]) for epoch in range(1, maxepoches): if epoch % 25 == 0 and epoch > 0: lrf /= 2 adam = Adam(lr=lrf, decay=lr_decay)
def main(): print('> Starting execution...') # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') group = parser.add_mutually_exclusive_group() group.add_argument('--fit', action='store_true', help='fit the tuned model on digits 0-4') group.add_argument('--transfer', action='store_true', help='train a pretrained model on digits 5-9') parser.add_argument('--batch-size', type=int, default=256, metavar='N', help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=50, metavar='E', help='number of epochs to train (default: 50)') parser.add_argument('--lr', type=float, default=1e-3, metavar='L', help='learning rate (default: 1e-3)') parser.add_argument('--early-stopping', type=int, default=7, metavar='E', help='early stopping (default: 7 epochs)') parser.add_argument( '--size', type=int, default=100, metavar='S', help='size of the training data for transfer learning (default: 100)') parser.add_argument('--seed', type=int, default=23, metavar='S', help='random seed (default: 23)') args = parser.parse_args() use_cuda = torch.cuda.is_available() # use cuda if available device = torch.device("cuda" if use_cuda else "cpu") torch.manual_seed(args.seed) # random seed print('> Loading MNIST data') train_set = datasets.MNIST(MNIST_DATA_DIR, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) test_set = datasets.MNIST(MNIST_DATA_DIR, train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) train_digits_04 = np.where(train_set.train_labels < 5)[0] train_digits_59 = np.where(train_set.train_labels > 4)[0] test_digits_04 = np.where(test_set.test_labels < 5)[0] test_digits_59 = np.where(test_set.test_labels > 4)[0] if args.fit: # Training the tuned model on digits 0-4 print('> Training a new model on MNIST digits 0-4') X_train_04, y_train_04, X_valid_04, y_valid_04 = data_to_numpy( train_set, test_set, INPUT_DIM, train_digits_04, test_digits_04) torch.manual_seed(args.seed) print('> Initializing the model') model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM, batch_norm=True) model.apply(init_he_normal) # He initialization model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) print('> Training the model') model, _, _ = train_model(model, device, X_train_04, y_train_04, criterion, optimizer, X_valid=X_valid_04, y_valid=y_valid_04, batch_size=args.batch_size, n_epochs=args.epochs, early_stopping=args.early_stopping) print(f'> Saving the model state at {MODEL_04_PATH}') torch.save(model.state_dict(), MODEL_04_PATH) elif args.transfer: # Transfer learning print( '> Training a model on MNIST digits 5-9 from a pretrained model for digits 0-4' ) if os.path.isfile(MODEL_04_PATH): print('> Loading the pretrained model') model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM, batch_norm=True).to(device) model.load_state_dict(torch.load(MODEL_04_PATH)) for param in model.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default model.fc4 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM) model.fc5 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM) model.out = nn.Linear(HIDDEN_DIM, OUTPUT_DIM) print('> Using saved model state') else: print( '> Model state file is not found, fit a model before the transfer learning' ) print('> Stopping execution') return X_train_59, y_train_59, X_valid_59, y_valid_59 = data_to_numpy( train_set, test_set, INPUT_DIM, train_digits_59[:args.size], test_digits_59) # fixing the issues with labels y_train_59 = y_train_59 - 5 y_valid_59 = y_valid_59 - 5 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) print('> Training the model') model, _, _ = train_model(model, device, X_train_59, y_train_59, criterion, optimizer, X_valid=X_valid_59, y_valid=y_valid_59, batch_size=args.batch_size, n_epochs=args.epochs, early_stopping=args.early_stopping) print(f'> Saving the model state at {MODEL_59_PATH}') torch.save(model.state_dict(), MODEL_59_PATH) else: print('> Incorrect mode, try either `--fit` or `--transfer`') print('> Stopping execution')
model_path=args.word_model, dict_path=args.dict, seq_len=seq_len) test_data = DataLoader(test_words, batch_size=args.batch_size, shuffle=False) scores += model_manager.get_all_predictions(test_data) file = open(args.predict, 'w') file.write('id,label\n') for i, score in enumerate(scores): pred = 1 if score > threshold else 0 file.write('{},{}\n'.format(i, pred)) elif args.mode == 'bow': model = DNN() train_words = BOW(mode='train', x_path=args.train_x, y_path=args.train_y) valid_words = BOW(mode='valid', x_path=args.train_x, y_path=args.train_y) train_data = DataLoader(train_words, batch_size=args.batch_size, shuffle=True) valid_data = DataLoader(valid_words, batch_size=args.batch_size, shuffle=False) manager = Manager(model, args) manager.train(train_data, valid_data)
if args.valid_ratio != 0: valid_size = int(len(train_x) * args.valid_ratio) print('Split %d/%d validation data...' % (valid_size, len(train_x))) train_x, train_y = shuffle(train_x, train_y) valid_x, valid_y = train_x[-valid_size:], train_y[-valid_size:] train_x, train_y = train_x[:-valid_size], train_y[:-valid_size] num_users = max(train_x[:, 0]) num_movies = max(train_x[:, 1]) print('Select %s Model' % args.model) if args.model == 'MF': model = Matrix_Factorization(num_users, num_movies, args.vector_dim, verbose=1) if args.model == 'DNN': model = DNN(num_users, num_movies, args.vector_dim, verbose=1) adam = Adam(lr=1e-4) csvlogger = CSVLogger(logger) earlystopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min') checkpoint = ModelCheckpoint(params, monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=0, mode='min') model.compile(loss='mse', optimizer=adam) print('Start Training...')
def organize_features(sample): y = [ sample[Act_inx], ] features = list(sample[Act_inx + 1:]) return (features, y) build_batch = (BuildBatch(BATCH_SIZE).by(0, 'vector', float).by(1, 'number', float)) if NET_ARCH == 'deep_net': model = deep_net(input_shape=(feature_dim, )) opti = Adam(lr=0.0001, beta_1=0.5) elif NET_ARCH == 'DNN': model = DNN(input_shape=(feature_dim, )) opti = sgd(lr=0.01, momentum=0.9, clipnorm=1.0) else: sys.exit("Network not defined correctly, check NET_ARCH. ") model.compile(optimizer=opti, loss='mean_squared_error', metrics=[Rsqured]) def train_network_batch(sample): tloss = model.train_on_batch(sample[0], sample[1]) return (tloss[0], tloss[1]) def test_network_batch(sample): tloss = model.test_on_batch(sample[0], sample[1]) return (tloss[0], )
def train(args, config, io): train_loader, validation_loader, unlabelled_loader = get_loader( args, config) device = torch.device("cuda" if args.cuda else "cpu") #Try to load models model = DNN(args).to(device) ema_model = DNN(args).to(device) for param in ema_model.parameters(): param.detach_() if device == torch.device("cuda"): model = nn.DataParallel(model) ema_model = nn.DataParallel(ema_model) if args.model_path != "": model.load_state_dict(torch.load(args.model_path)) ema_model.load_state_dict(torch.load(args.model_path)) if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr * 100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = nn.MSELoss() consistency_criterion = nn.MSELoss() best_test_loss = 9999999. global_step = 0 for epoch in range(args.epochs): startTime = time.time() #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() ema_model.train() i = -1 for (data, label), (u, _) in zip(cycle(train_loader), unlabelled_loader): i = i + 1 if data.shape[0] != u.shape[0]: bt_size = np.minimum(data.shape[0], u.shape[0]) data = data[0:bt_size] label = label[0:bt_size] u = u[0:bt_size] data, label, u = data.to(device), label.to(device), u.to(device) batch_size = data.shape[0] logits = model(data) class_loss = criterion(logits, label) u_student = jitter(u, device) u_teacher = jitter(u, device) logits_unlabeled = model(u_student) ema_logits_unlabeled = ema_model(u_teacher) ema_logits_unlabeled = Variable(ema_logits_unlabeled.detach().data, requires_grad=False) consistency_loss = consistency_criterion(logits_unlabeled, ema_logits_unlabeled) if epoch < args.consistency_rampup_starts: consistency_weight = 0.0 else: consistency_weight = get_current_consistency_weight( args, args.final_consistency, epoch, i, len(unlabelled_loader)) consistency_loss = consistency_weight * consistency_loss loss = class_loss + consistency_loss opt.zero_grad() loss.backward() opt.step() global_step += 1 # print(global_step) update_ema_variables(model, ema_model, args.ema_decay, global_step) count += batch_size train_loss += loss.item() * batch_size scheduler.step() outstr = 'Train %d, loss: %.6f' % (epoch, train_loss * 1.0 / count) io.cprint(outstr) #################### # Evaluation #################### test_loss = 0.0 count = 0.0 model.eval() ema_model.eval() for data, label in validation_loader: data, label = data.to(device), label.to(device) batch_size = data.shape[0] logits = ema_model(data) loss = criterion(logits, label) count += batch_size test_loss += loss.item() * batch_size outstr = 'Test %d, loss: %.6f' % (epoch, test_loss * 1.0 / count) io.cprint(outstr) if test_loss <= best_test_loss: best_test_loss = test_loss torch.save(ema_model.state_dict(), 'checkpoints/%s/models/model.t7' % args.exp_name) torch.save(ema_model, (config.root + config.model_path)) io.cprint('Time: %.3f sec' % (time.time() - startTime))
embedding = corpus.load_embedding( path=os.path.join(args.get('data_path'), 'embedding.json')) embedding = torch.from_numpy(embedding).float() if args['weighted']: weight = corpus.load_json( os.path.join(source_path, 'cross_entropy_loss_weight.json')) weight = torch.FloatTensor(list(weight.values())).to(device) data = sorted(corpus.examples.get('seq'), key=lambda x: len(x), reverse=True) vocab_size = len(corpus.words2id) logging.info('vocabulary size: {}'.format(vocab_size)) model = DNN(vocab_size=vocab_size, embedding_size=200, hidden_size=512, embedding=embedding) model.to(device) loss_function = nn.CrossEntropyLoss(weight=weight) optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # optimizer = torch.optim.Adam(model.parameters(), lr=0.001) model.train() total_data = len(data) batch_size = args['batch_size'] total_step = math.ceil(total_data / batch_size) last_training_loss = 1000000000000
def train(): device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') data_dict, topic_dict = dh.load_data( ) # data_dict, [group2topic, mem2topic] train_data, train_label, dev_data, dev_label, test_data, test_label = dh.data_split( data_dict, topic_dict) train_dataset = dh.Dataset(train_data, train_label) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) dev_dataset = dh.Dataset(dev_data, dev_label) dev_loader = DataLoader(dev_dataset, batch_size=128, shuffle=True) lambda1 = lambda epoch: ( epoch / args.warm_up_step ) if epoch < args.warm_up_step else 0.5 * (math.cos( (epoch - args.warm_up_step) / (args.n_epoch * len(train_dataset) - args.warm_up_step) * math.pi) + 1) model = DNN(args).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.init_lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, len(train_loader) * args.n_epoch) global_step = 0 best_f1 = 0. loss_deq = collections.deque([], args.report_step) for epoch in range(args.n_epoch): for batch in tqdm(train_loader): optimizer.zero_grad() inputs = batch['input'].to(device) group_topic = batch['group_topic'].to(device) mem_topic = batch['mem_topic'].to(device) labels = batch['label'].to(device) output = model(inputs, mem_topic, group_topic, label=labels) loss = output[0] loss.backward() loss_deq.append(loss.item()) optimizer.step() scheduler.step() global_step += 1 if global_step % args.report_step == 0: logger.info('loss: {}, lr: {}, epoch: {}'.format( np.average(loss_deq).item(), optimizer.param_groups[0]['lr'], global_step / len(train_dataset))) if global_step % args.eval_step == 0: model.eval() eval_result = evaluation(model, data_loader=dev_loader, device=device) logger.info(eval_result) if eval_result['f1'] > best_f1: torch.save(model, './model/{}/torch.pt'.format(args.task_name)) best_f1 = eval_result['f1'] model.train()