def predict(self, x, batch_size=512): '''Model prediction Args: x (ndarray): input image of shape (m, hi, wi, ci). Where m: number of records. hi, wi: height and width of input image. ci: channels of input image. Returns: yhat (ndarray): input with shape (m, k). k is the number of output units, m is the number of records. ''' n = len(x) x = force4D(x) # do prediction in batches as the vectorized convolution inflates memory # usage considerably. yhat = [] batches = getBatch(n, batch_size) for idxii in batches: xii = np.take(x, idxii, axis=0) _, activations = self.feedForward(xii) yhatii = activations[self.n_layers] yhat.append(yhatii) return np.concatenate(yhat, axis=0)
def evaluate(model, loss_func, dictionary, data): """evaluate the model while training""" model.eval() # turn on the eval() switch to disable dropout total_loss = 0 total_correct = 0 ####3 total_prediction = [] total_labels = [] for texts, labels, masks, bsz in utils.getBatch(data=data, dictionary=dictionary, maxlen=MAX_LEN, batch_size=BATCH_SIZE): hidden = model.init_hidden(texts.size(0)) fc, outh, pred, attention = model.forward(texts, masks, hidden) output_flat = pred.view(texts.size(0), -1) total_loss += loss_func(output_flat, labels).data prediction = torch.max(output_flat, 1)[1] total_correct += torch.sum((prediction == labels).float()) total_prediction += list(prediction) total_labels += list(labels) res = classification_report(total_labels, total_prediction) return res, total_loss[0] / (len(data) // BATCH_SIZE), total_correct.data[0] / len(data)
def batchTrain(self, x, y, epochs, batch_size): '''Training using mini batches Args: x (ndarray): input image of shape (m, hi, wi, ci). Where m: number of records. hi, wi: height and width of input image. ci: channels of input image. y (ndarray): input with shape (m, k). k is the number of output units, m is the number of records. epochs (int): number of epochs to train. batch_size (int): mini-batch size. Returns: costs (ndarray): overall cost at each epoch. ''' costs = [] m = len(x) x = force4D(x) for ee in range(epochs): batches = getBatch(m, batch_size, randomize=True) for idxii in batches: xii = np.take(x, idxii, axis=0) yii = np.take(y, idxii, axis=0) weight_sums, activations = self.feedForward(xii) gradsii, grads_biasii = self.feedBackward(weight_sums, activations, yii) self.gradientDescent(gradsii, grads_biasii, batch_size) je = self.evaluateCost(x, y) print('# <batchTrain>: cost at epoch %d, j = %f' % (ee, je)) costs.append(je) return np.array(costs)
def train(model, loss_func, dictionary, epoch, train_data, dev_data, identity_mat, stop_counter): global best_dev_loss, best_acc model.train() total_loss = 0 for texts, labels, masks, bsz in utils.getBatch(data=train_data, dictionary=dictionary, maxlen=MAX_LEN, batch_size=BATCH_SIZE): init_state = model.init_hidden(bsz) fc, outh, pred, attention = model.forward(sents=texts, mask=masks, init_hc=init_state) loss = loss_func(pred.view(texts.size(0), -1), labels) if USE_ATTENTION: attentionT = torch.transpose(attention, 1, 2).contiguous() extra_loss = Frobenius( torch.bmm(attention, attentionT) - identity_mat[:attention.size(0)]) loss += PENALIZATION_COEFF * extra_loss optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), CLIP) optimizer.step() total_loss += loss.data res, dev_loss, acc = evaluate(model, loss_func, dictionary, dev_data) print(res) utils.saveLog(LOG_PATH, res) total_res = 'epoch: %d, dev loss: %f, acc: %f' % (epoch + 1, dev_loss, acc) print(total_res) utils.saveLog(LOG_PATH, total_res) utils.div('-') if not best_dev_loss or dev_loss < best_dev_loss: with open(MODEL_PATH % (dev_loss, acc), 'wb') as f: torch.save(model, f) best_dev_loss = dev_loss stop_counter = 0 else: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.2 if EARLY_STOP != 0: stop_counter += 1 return stop_counter
def batchTrain(self, x, y, epochs, batch_size): '''Training using mini batches Args: x (ndarray): input with shape (n, h, w) or (n, h, w, c). n is the number of records. h the image height, w the image width and c the number of channels. y (ndarray): input with shape (n, m). m is the number of output units, n is the number of records. epochs (int): number of epochs to train. batch_size (int): mini-batch size. Returns: self.costs (ndarray): overall cost at each epoch. ''' costs = [] m = len(x) for ee in range(epochs): batches = getBatch(m, batch_size, randomize=True) for idxjj in batches: for idxii in idxjj: xii = np.atleast_3d(x[idxii]) yii = y[idxii] weight_sums, activations = self.feedForward(xii) gradsii, grads_biasii = self.feedBackward( weight_sums, activations, yii) if idxii == idxjj[0]: gradsjj = gradsii grads_biasjj = grads_biasii else: gradsjj = self.sumGradients(gradsjj, gradsii) grads_biasjj = self.sumGradients( grads_biasjj, grads_biasii) self.gradientDescent(gradsjj, grads_biasjj, batch_size) je = self.evaluateCost(x, y) print('# <batchTrain>: cost at epoch %d, j = %f' % (ee, je)) costs.append(je) return np.array(costs)
def evaluateCost(self, x, y, batch_size=512): '''Compute mean cost on a dataset Args: x (ndarray): input image of shape (m, hi, wi, ci). Where m: number of records. hi, wi: height and width of input image. ci: channels of input image. y (ndarray): input with shape (m, k). k is the number of output units, m is the number of records. Returns: j (float): mean cost over dataset <x, y>. ''' j = 0 n = len(x) batches = getBatch(n, batch_size) for idxii in batches: xii = np.take(x, idxii, axis=0) yii = np.take(y, idxii, axis=0) yhatii = self.predict(xii, batch_size) jii = self.sampleCost(yhatii, yii) j += jii j2 = self.regCost() j += j2 return j/n
if use_gpu: model.cuda() parameters = model.parameters() optimizer = torch.optim.Adamax(parameters) loss_function = torch.nn.MSELoss() total_len = len(train_dataset) print("start training...") for epoch in range(epochs): start_time = time.time() total_loss = 0 for i in tqdm(range(0, len(train_dataset) - batch_size, batch_size)): features, labels = getBatch(train_dataset, i, batch_size) if use_gpu: features, labels = features.cuda(), labels.cuda() model.zero_grad() output = model(features) loss = loss_function(output, Variable(labels)) loss.backward() optimizer.step() total_loss += loss.item() * len(features) logger.info(f'epoch:{epoch},train_loss:{total_loss/total_len}')
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels)) optimizer = tf.train.AdamOptimizer().minimize(loss) sess = tf.InteractiveSession() saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) tf.summary.scalar('Loss', loss) tf.summary.scalar('Accuracy', accuracy) merged = tf.summary.merge_all() logdir = "tensorboard/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer = tf.summary.FileWriter(logdir, sess.graph) for i in range(NUMBER_OF_ITERATIONS): nextBatch, nextBatchLabels = getBatch(train, BATCH_SIZE, MAX_SEQUENCE_LENGTH) sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels}) # Write summary to Tensorboard if i % 50 == 0: summary = sess.run(merged, { input_data: nextBatch, labels: nextBatchLabels }) writer.add_summary(summary, i) # Save the network every 10,000 training iterations if i % 10000 == 0 and i != 0: save_path = saver.save(sess, "models/pretrained_lstm.ckpt", global_step=i)
from utils import getBatch, calMetric, drawPicture import torch if __name__ == "__main__": test_dataset = pd.read_pickle('dataset/test.pkl') model_path = 'log/birnn.pt' model = rnnModel(hidden_dim, feature_dim, batch_size) model.load_state_dict(torch.load(model_path)) if use_gpu: model.cuda() predict = [] ground_truth = [] for i in tqdm(range(0, len(test_dataset) - batch_size, batch_size)): features, labels = getBatch(test_dataset, i, batch_size) if use_gpu: features, labels = features.cuda(), labels.cuda() model.zero_grad() output = model(features) predict.extend(output.data.cpu().numpy()) ground_truth.extend(labels.data.cpu().numpy()) df = pd.DataFrame() df['predict'] = predict df['ground_truth'] = ground_truth df.to_csv("log/result.csv")
def train(model, x_train, y_train, x_validation, y_validation, loss_object, optimizer, ckpt, manager, batch_size=32, n_epochs=10): # trying to restore a previous checkpoint. If it fails, starts from scratch ckpt.restore(manager.latest_checkpoint) if manager.latest_checkpoint: print("Restored from {}".format(manager.latest_checkpoint)) else: print("Initializing from scratch.") iterations = x_train.shape[0] / batch_size if x_train.shape[0] % batch_size != 0: iterations += 1 best_step = -1 best_epoch_val_loss = 100.0 for e in range(n_epochs): loss_iteration = 0 x_train, y_train = shuffle(x_train, y_train, random_state=0) total_loss = 0.0 for ibatch in range(int(iterations)): batch_x = getBatch(x_train, ibatch, batch_size) batch_y = getBatch(y_train, ibatch, batch_size) with tf.GradientTape() as tape: predictions = model(batch_x, training=True) loss = loss_object(batch_y, predictions) loss_iteration += loss.numpy() gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients( zip(gradients, model.trainable_variables)) total_loss = loss_iteration / int(iterations) pred = model(x_train, training=False) y_argmax = np.argmax(y_train, axis=1) pred_argmax = np.argmax(pred, axis=1) ac = accuracy_score(y_argmax, pred_argmax) # increment of checkpoint step ckpt.step.assign_add(1) if total_loss <= best_epoch_val_loss: # new best model found, so save the checkpoint into a file best_epoch_val_loss = total_loss best_step = int(ckpt.step) save_path = manager.save() print("Saved checkpoint for step {}: {}".format( best_step, save_path)) print("loss {:1.2f}".format(best_epoch_val_loss)) print("epoch %d loss %f Train Accuracy %f" % (e, total_loss, np.round(ac, 4))) pred = model(x_validation, training=False) print("vs. Validation Accuracy %f" % accuracy_score( np.argmax(y_validation, axis=1), np.argmax(pred, axis=1))) print("===============")
HIDDEN_SIZE = 300 EPOCH = 3 LEARNING_RATE = 0.001 model = WindowClassifier(len(word2index), EMBEDDING_SIZE, WINDOW_SIZE, HIDDEN_SIZE, len(tag2index)) print model if cuda: model = model.cuda() loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) for ep in range(EPOCH): losses = [] acc = [] for bid, batch in enumerate(getBatch(BATCH_SIZE, train_data)): x, y = list(zip(*batch)) inputs = torch.cat( [prepare_sequence(sent, word2index).view(1, -1) for sent in x]) targets = torch.cat([prepare_tag(tag, tag2index) for tag in y]) model.zero_grad() # print inputs.size() pred = model(inputs, istraining=True) # acc.append(np.where(pred==targets).shape[0]) # print pred loss = loss_function(pred, targets) losses.append(loss.data.tolist()[0]) loss.backward() optimizer.step() if bid % 1000 == 0:
base = "data/" readList = open("remaining.txt").read().split()[:50] ########## batch_size = 64 NUM_EPOCH = 80 # 1200 learning_rate = 0.0003 enc = fontEncoder().cuda() cla = classifier().cuda() loss_function = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(list(enc.parameters()) + list(cla.parameters()), lr=learning_rate) train_set = getBatch(base, readList, BATCH_SIZE=batch_size) for epoch in range(NUM_EPOCH): print("current epoch: ", epoch) for index, (image, label) in enumerate(train_set): optimizer.zero_grad() image = image.cuda() label = label.cuda() embedding = enc(image) output = cla(embedding) loss = loss_function(output, label)
model.add(Dense(input_dim=INPUT_SHAPE, output_dim=150)) model.add(Activation("relu")) model.add(Dropout(0.50)) model.add(Dense(output_dim=130)) model.add(Activation("relu")) model.add(Dropout(0.50)) model.add(Dense(output_dim=128)) model.compile("nadam", "mae") print('Training...') i = 0 for epoch in range(EPOCHS): random.seed(42) random.shuffle(text) print(' EPOCH:', epoch) for text_descriptors, img_descriptors in utils.getBatch( text, images, BATCH_SIZE): print(vstack(text_descriptors).shape) """ tmp = list(zip(text_descriptors, img_descriptors)) random.seed(42) random.shuffle(tmp) text_descriptors, img_descriptors = zip(*tmp)""" t0 = time.time() mlp.partial_fit(vstack(text_descriptors), img_descriptors) print(' Partial fit {} took: {} min, Score {}'.format( i, round((time.time() - t0) / 60, 2), mlp.loss_)) """ mlp.fit(vstack(text_descriptors), img_descriptors) """ """ i = i+1 if i == 2: