def train(self, data_loader, valid_loader, epochs, learning_rate, dropout_prob=None): losses_train = [] losses_valid = [] for epoch in range(epochs): print("epoch", epoch) # 训练部分 epoch_loss_train = 0 for step, (x, y) in enumerate(data_loader): # x:[b, 28, 28] -> [b, 784] , y:[b, 1] -> [b, 10] x = x.reshape(-1, 28 * 28) y = onehot(y, 10) nets, pred = self.forward(x, dropout_prob) loss = cross_entropy(y, pred) epoch_loss_train += loss grads = self.backward(nets, y, pred, dropout_prob) # SGD更新参数 # self.params = optimizer.optimize(self.weight_num, self.params, grads, y.shape[0]) self.params = self.optimizer.optimize(self.weight_num, self.params, grads, y.shape[0]) if step % 100 == 0: print("epoch {} training step {} loss {:.4f}".format( epoch, step, loss)) losses_train.append(epoch_loss_train) print(epoch_loss_train) data_loader.restart() # 验证部分,只进行前向传播 epoch_loss_valid = 0 for step, (x, y) in enumerate(valid_loader): x = x.reshape(-1, 28 * 28) y = onehot(y, 10) nets, pred = self.forward(x, dropout_prob) loss = cross_entropy(y, pred) epoch_loss_valid += loss if step % 100 == 0: print("epoch {} validation step {} loss {:.4f}".format( epoch, step, loss)) losses_valid.append(epoch_loss_valid) valid_loader.restart() his = {'train_loss': losses_train, 'valid_loss': losses_valid} return his
def train_bn(self, data_loader, valid_loader, epochs, learning_rate): losses_train = [] losses_valid = [] for epoch in range(epochs): print("epoch", epoch) epoch_loss_train = 0 # 重置全局均值和方差 # 批量训练 for step, (x, y) in enumerate(data_loader): # x:[b, 28, 28] -> [b, 784] , y:[b, 1] -> [b, 10] x = x.reshape(-1, 28 * 28) y = onehot(y, 10) nets, pred = self.forward_bn(x, bn_mode='train') grads = self.backward_bn(nets, y, pred) self.optimizer.optimize(self.weight_num, self.params, grads, y.shape[0]) loss = cross_entropy(y, pred) epoch_loss_train += loss if step % 100 == 0: print("epoch {} step {} loss {:.4f}".format( epoch, step, loss)) losses_train.append(epoch_loss_train) data_loader.restart() print(epoch_loss_train) # 验证集测试 epoch_loss_valid = 0 for step, (x, y) in enumerate(valid_loader): x = x.reshape(-1, 28 * 28) y = onehot(y, 10) nets, pred = self.forward_bn(x, bn_mode='test') loss = cross_entropy(y, pred) epoch_loss_valid += loss if step % 100 == 0: print("epoch {} step {} loss {:.4f}".format( epoch, step, loss)) losses_valid.append(epoch_loss_valid) valid_loader.restart() his = {'train_loss': losses_train, 'valid_loss': losses_valid} return his
def speed_test_10(): dataset_dir = os.path.join(root_of_datasets, 'ImageNet') img_extension, classnames = tools.process_dataset_config( os.path.join(dataset_dir, 'dataset_info.xml')) nclasses = len(classnames) labels_file = os.path.join(dataset_dir, 'train_labels.txt') filenames, labels = read_paths_and_labels(labels_file, dataset_dir, percent_of_data, shuffle_data) batched_dataset = build_dataset(filenames, labels) iterator = tf.data.Iterator.from_structure(batched_dataset.output_types, batched_dataset.output_shapes) x, y = iterator.get_next(name='iterator-output') train_init_op = iterator.make_initializer(batched_dataset, name='train_init_op') resnet_v1 = tf.contrib.slim.nets.resnet_v1 with slim.arg_scope( tf.contrib.slim.python.slim.nets.resnet_utils.resnet_arg_scope() ): # This arg scope is mandatory. Otherwise we checkpoint file will fail at loading logits, _ = resnet_v1.resnet_v1_50(x, num_classes=nclasses, is_training=True, scope='resnet_v1_50') logits = tf.squeeze(logits, axis=[1, 2]) loss = losses.cross_entropy(y, logits) tf.summary.scalar("loss", loss) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) update_bn_stats_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_bn_stats_ops): train_op = optimizer.minimize(loss, name='train_op') init_op = tf.global_variables_initializer() outdir = os.path.join(tools.get_base_dir(), 'tensorboard') if os.path.exists(outdir): shutil.rmtree(outdir) os.makedirs(outdir) else: os.makedirs(outdir) with tf.Session() as sess: merged, summary_writer, tensorboard_url = prepare_tensorboard( sess, outdir) sess.run(init_op) sess.run(train_init_op) for i in range(n_steps): ini = time.time() _, summaryOut = sess.run(fetches=[train_op, merged]) summary_writer.add_summary(summaryOut, i) fin = time.time() print('Step ' + str(i) + ' done in ' + str(fin - ini) + ' s.')
def train_autoencoder(Autoencoder, Superclass, Old_superclass): # ================== used to train the new encoder ================== print('\n=========== refesh the autoencoders ===========') for dict in Superclass: refresh = 'false' if dict not in Old_superclass.keys(): refresh = 'true' elif Superclass[dict] != Old_superclass[dict]: refresh = 'true' if refresh == 'true': print('\nrefeshing the autoencoder:' + dict) Autoencoder[dict] = autoencoder(args) if cf.use_cuda: Autoencoder[dict].cuda() cudnn.benchmark = True for epoch in range(args.num_epochs_train): Autoencoder[dict].train() required_train_loader = get_dataLoder(args, classes=Superclass[dict], mode='Train', encoded=True, one_hot=False) param = list(Autoencoder[dict].parameters()) optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch) for batch_idx, (inputs, targets) in enumerate(required_train_loader): if batch_idx >= args.num_test: break if cf.use_cuda: inputs = inputs.cuda() # GPU settings optimizer.zero_grad() inputs = Variable(inputs) reconstructions, _ = Autoencoder[dict](inputs) loss = cross_entropy(reconstructions, inputs) loss.backward() # Backward Propagation optimizer.step() # Optimizer update sys.stdout.write('\r') sys.stdout.write( 'Refreshing autoencoder:' + dict + ' with Epoch [%3d/%3d] Iter [%3d]\t\t Loss: %.4f' % (epoch + 1, args.num_epochs_train, batch_idx + 1, loss.item())) sys.stdout.flush() print('\nautoencoder model:' + str(dict) + ' is constrcuted with final loss:' + str(loss.item())) return Autoencoder
def build_loss(self, args): self.loss = losses.cross_entropy(self.labels, self.net_output) if args.l2_regularization > 0: self.loss += L2RegularizationLoss(args) self.loss = tf.identity(self.loss, name='loss') # This is just a workaround to rename the loss function to 'loss' # self.loss = tf.divide(self.loss, np.float32(args.batch_size), name='loss') # self.loss = tf.Print(self.loss, [self.loss], 'total loss') # Tensorboard: tf.summary.scalar("loss", self.loss) return
def training_step(self, batch, batch_idx, part='train'): x, y = batch logits = self(x) loss = cross_entropy(logits, y, smooth=self.args.label_smoothing) acc = accuracy(logits, y) # Log self.log(f'{part}_loss', loss) self.log(f'{part}_acc', acc, prog_bar=True) if part == 'train': self.log('lr', self.optimizer.param_groups[0]['lr']) """ Define a single training step. """ return loss
def predict(self, data_loader, bn=False): labels = [] pred = [] losses = 0 for (x, y) in data_loader: x = x.reshape(-1, 28 * 28) y = onehot(y, 10) if bn: _, out = self.forward_bn(x, 'test') else: _, out = self.forward(x) loss = cross_entropy(y, out) losses += loss out = list(np.argmax(out, axis=-1).flatten()) y = list(np.argmax(y, axis=1).flatten()) labels += y pred += out return np.array(pred).astype('int'), np.array(labels).astype('int')
def speed_test_8(): dataset_dir = os.path.join(os.path.dirname(tools.get_base_dir()), 'datasets', 'coco-animals') img_extension, classnames = tools.process_dataset_config( os.path.join(dataset_dir, 'dataset_info.xml')) nclasses = len(classnames) labels_file = os.path.join(dataset_dir, 'train_labels.txt') filenames, labels = read_paths_and_labels(labels_file, dataset_dir) batched_dataset = build_dataset(filenames, labels) iterator = tf.data.Iterator.from_structure(batched_dataset.output_types, batched_dataset.output_shapes) x, y = iterator.get_next(name='iterator-output') train_init_op = iterator.make_initializer(batched_dataset, name='train_init_op') resnet_v1 = tf.contrib.slim.nets.resnet_v1 with slim.arg_scope( tf.contrib.slim.python.slim.nets.resnet_utils.resnet_arg_scope() ): # This arg scope is mandatory. Otherwise we checkpoint file will fail at loading logits, _ = resnet_v1.resnet_v1_50(x, num_classes=nclasses, is_training=True, scope='resnet_v1_50') logits = tf.squeeze(logits, axis=[1, 2]) loss = losses.cross_entropy(y, logits) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) update_bn_stats_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_bn_stats_ops): train_op = optimizer.minimize(loss, name='train_op') init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) sess.run(train_init_op) for i in range(n_steps): ini = time.time() sess.run(fetches=[train_op]) fin = time.time() print('Step ' + str(i) + ' done in ' + str(fin - ini) + ' s.')
def train_test_autoencoder(newclasses, Autoencoder): # ================== used to train the new encoder ================== Autoencoder[str(newclasses)] = autoencoder(args) if cf.use_cuda: Autoencoder[str(newclasses)].cuda() cudnn.benchmark = True for epoch in range(args.num_epochs_train): Autoencoder[str(newclasses)].train() required_train_loader = get_dataLoder(args, classes=[newclasses], mode='Train', encoded=True, one_hot=True) param = list(Autoencoder[str(newclasses)].parameters()) optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch) print('\n==> Epoch #%d, LR=%.4f' % (epoch + 1, lr)) for batch_idx, (inputs, targets) in enumerate(required_train_loader): if batch_idx >= args.num_test: break if cf.use_cuda: inputs = inputs.cuda() # GPU settings optimizer.zero_grad() inputs = Variable(inputs) reconstructions, _ = Autoencoder[str(newclasses)](inputs) loss = cross_entropy(reconstructions, inputs) loss.backward() # Backward Propagation optimizer.step() # Optimizer update sys.stdout.write('\r') sys.stdout.write( 'Train autoencoder:' + str(newclasses) + ' with Epoch [%3d/%3d] Iter [%3d]\t\t Loss: %.4f' % (epoch + 1, args.num_epochs_train, batch_idx + 1, loss.item())) sys.stdout.flush() # =============== used to classify it and nut it in a proper superclass ============== if Autoencoder: Loss = {} Rel = {} print('\ntesting the new data in previous autoencoders') for dict in Autoencoder: Loss[dict] = 0 required_valid_loader = get_dataLoder(args, classes=[int(dict)], mode='Valid', encoded=True, one_hot=True) for batch_idx, (inputs, targets) in enumerate(required_valid_loader): if batch_idx >= args.num_test: break if cf.use_cuda: inputs = inputs.cuda() # GPU settings inputs = Variable(inputs) reconstructions, _ = Autoencoder[dict](inputs) loss = cross_entropy(reconstructions, inputs) Loss[dict] += loss.data.cpu().numpy( ) if cf.use_cuda else loss.data.numpy() print('\nAutoencoder:' + str(newclasses) + ' is been delated and wait for update for every ten classes') Autoencoder.pop( str(newclasses), '\nthe class:' + str(newclasses) + ' is not been delated as the dict not exist') highest = 0 test_result = '' for dict in Loss: Rel[dict] = 1 - abs( (Loss[dict] - Loss[str(newclasses)]) / Loss[str(newclasses)]) if Rel[dict] >= highest and Rel[ dict] >= args.rel_th and dict != str(newclasses): highest = Rel[dict] test_result = dict print('\nnewclass:' + str(newclasses) + ' is add to superclass with class:' + dict) print('\nClass rel:', Rel, ' and Loss:', Loss) return Autoencoder, test_result else: return Autoencoder, _
encoded_inputs = encoded_inputs[i:i + 1, :, :, :] target = targets[i].data.cpu().numpy() A_out = {} highest = {} highest['rel'] = float("inf") highest['dict'] = '' for a in Autoencoder: if cf.use_cuda: Autoencoder[a].cuda() cudnn.benchmark = True try: reconstruction, _ = Autoencoder[a](encoded_inputs) except RuntimeError: continue loss = cross_entropy(reconstruction, input) if loss.data.cpu().numpy() <= highest['rel']: highest['rel'] = loss.data.cpu().numpy() highest['dict'] = a s_class = [ k for k, v in Superclass.items() if int(highest['dict']) in v ][0] if target in Superclass[s_class]: result['root_correct_' + str(newclass)] += 1 if len(Superclass[s_class]) > 1: if cf.use_cuda: Expert[s_class].cuda() cudnn.benchmark = True output = Expert[s_class](input) output = torch.argmax(output, 1)
def loss(self, x, t): z = self.predict(x) y = softmax(z) loss = cross_entropy(y, t) return loss
def evaluate_text_classify(self, model, valid_loader): total_loss = 0 total_steps = 0 total_samples = 0 hit_num = 0 total_num = 0 logits_list = list() y_trues = list() total_spent_time = 0.0 for _step, batch in enumerate(valid_loader): batch = { key: val.cuda() if isinstance(val, torch.Tensor) else val for key, val in batch.items() } infer_start_time = time.time() with torch.no_grad(): student_outputs = model(batch) infer_end_time = time.time() total_spent_time += infer_end_time - infer_start_time assert "logits" in student_outputs and "label_ids" in batch logits, label_ids = student_outputs["logits"], batch["label_ids"] y_trues.extend(label_ids.tolist()) logits_list.extend(logits.tolist()) hit_num += torch.sum( torch.argmax(logits, dim=-1) == label_ids).item() total_num += label_ids.shape[0] if len(logits.shape) == 1 or logits.shape[-1] == 1: tmp_loss = losses.mse_loss(logits, label_ids) elif len(logits.shape) == 2: tmp_loss = losses.cross_entropy(logits, label_ids) else: raise RuntimeError total_loss += tmp_loss.mean().item() total_steps += 1 total_samples += valid_loader.batch_size if (_step + 1) % 100 == 0: logger.info("Eval: %d/%d steps finished" % (_step + 1, len(valid_loader.dataset) // valid_loader.batch_size)) logger.info("Inference time = {:.2f}s, [{:.4f} ms / sample] ".format( total_spent_time, total_spent_time * 1000 / total_samples)) eval_loss = total_loss / total_steps logger.info("Eval loss: {}".format(eval_loss)) logits_list = np.array(logits_list) eval_outputs = list() for metric in self.metrics: if metric.endswith("accuracy"): acc = hit_num / total_num logger.info("Accuracy: {}".format(acc)) eval_outputs.append(("accuracy", acc)) elif metric == "f1": f1 = f1_score(y_trues, np.argmax(logits_list, axis=-1)) logger.info("F1: {}".format(f1)) eval_outputs.append(("f1", f1)) elif metric == "macro-f1": f1 = f1_score(y_trues, np.argmax(logits_list, axis=-1), average="macro") logger.info("Macro F1: {}".format(f1)) eval_outputs.append(("macro-f1", f1)) elif metric == "micro-f1": f1 = f1_score(y_trues, np.argmax(logits_list, axis=-1), average="micro") logger.info("Micro F1: {}".format(f1)) eval_outputs.append(("micro-f1", f1)) elif metric == "auc": auc = roc_auc_score(y_trues, np.argmax(logits_list, axis=-1)) logger.info("AUC: {}".format(auc)) eval_outputs.append(("auc", auc)) elif metric == "matthews_corrcoef": mcc = matthews_corrcoef(y_trues, np.argmax(logits_list, axis=-1)) logger.info("Matthews Corrcoef: {}".format(mcc)) eval_outputs.append(("matthews_corrcoef", mcc)) elif metric == "pearson_and_spearman": preds = logits_list[:, 0] pearson_corr = pearsonr(preds, y_trues)[0] spearman_corr = spearmanr(preds, y_trues)[0] logger.info("Peasrson: {}".format(pearson_corr)) logger.info("Spearmanr: {}".format(spearman_corr)) corr = (pearson_corr + spearman_corr) / 2.0 logger.info("Peasrson_and_spearmanr: {}".format(corr)) eval_outputs.append(("pearson_and_spearman", corr)) elif metric == "classification_report": logger.info("\n{}".format( classification_report(y_trues, np.argmax(logits_list, axis=-1), digits=4))) elif "last_layer_mse" in self.metrics: logger.info("Last layer MSE: {}".format(eval_loss)) eval_outputs.append(("last_layer_mse", -eval_loss)) else: raise NotImplementedError("Metric %s not implemented" % metric) return eval_outputs
def evaluate_sequence_labeling(self, model, valid_loader): def predict_sequence_labeling(raw_preds, raw_label_ids, label_enumerate_values, tok_to_orig_indexes): new_preds = list() new_labels = list() idx_label_map = dict({ idx: value for idx, value in enumerate(label_enumerate_values) }) for idx, (raw_pred, tok_to_orig_index) in enumerate( zip(raw_preds, tok_to_orig_indexes)): raw_label = raw_label_ids[idx] final_pred = list() final_label = list() prev_token_idx = -1 for k in range(min(len(raw_pred), len(tok_to_orig_index))): token_pred = raw_pred[k] token_label = raw_label[k] token_orig_idx = tok_to_orig_index[k] if token_orig_idx == -100: continue if token_orig_idx == prev_token_idx: continue final_pred.append(idx_label_map[token_pred]) final_label.append(idx_label_map[token_label]) prev_token_idx = token_orig_idx raw_sequence_length = max(tok_to_orig_index) + 1 while len(final_pred) < raw_sequence_length: final_pred.append(idx_label_map[len(idx_label_map) - 1]) new_preds.extend(final_pred + ["O"]) new_labels.extend(final_label + ["O"]) return new_preds, new_labels total_loss = 0 total_steps = 0 total_samples = 0 true_seqs = list() pred_seqs = list() total_spent_time = 0.0 for _step, batch in enumerate(valid_loader): batch = { key: val.cuda() if isinstance(val, torch.Tensor) else val for key, val in batch.items() } infer_start_time = time.time() with torch.no_grad(): student_outputs = model(batch) infer_end_time = time.time() total_spent_time += infer_end_time - infer_start_time assert "logits" in student_outputs and "label_ids" in batch logits, label_ids = student_outputs["logits"], batch["label_ids"] raw_preds = torch.argmax(logits, dim=-1).tolist() raw_label_ids = label_ids.tolist() new_preds, new_labels = predict_sequence_labeling( raw_preds, raw_label_ids, valid_loader.dataset.label_enumerate_values, batch["tok_to_orig_index"]) pred_seqs.extend(new_preds) true_seqs.extend(new_labels) logits = logits.view(-1, logits.size(-1)) label_ids = label_ids.view(-1) tmp_loss = losses.cross_entropy(logits, label_ids) total_loss += tmp_loss.mean().item() total_steps += 1 total_samples += valid_loader.batch_size if (_step + 1) % 100 == 0: logger.info("Eval: %d/%d steps finished" % (_step + 1, len(valid_loader.dataset) // valid_loader.batch_size)) logger.info("Inference time = {:.2f}s, [{:.4f} ms / sample] ".format( total_spent_time, total_spent_time * 1000 / total_samples)) eval_loss = total_loss / total_steps logger.info("Eval loss: {}".format(eval_loss)) (prec, rec, f1) = evaluate_sequence_labeling(true_seqs, pred_seqs) logger.info("Labeling F1: {}".format(f1)) logger.info("Labeling Precision: {}".format(prec)) logger.info("Labeling Recall: {}".format(rec)) eval_outputs = list() eval_outputs.append(("labeling_f1", f1)) eval_outputs.append(("labeling_precision", prec)) eval_outputs.append(("labeling_recall", rec)) return eval_outputs
def evaluate_language_modeling(self, model, valid_loader): total_loss = 0 total_steps = 0 total_samples = 0 hit_num = 0 total_num = 0 total_spent_time = 0.0 for _step, batch in enumerate(valid_loader): batch = { key: val.cuda() if isinstance(val, torch.Tensor) else val for key, val in batch.items() } infer_start_time = time.time() with torch.no_grad(): student_outputs = model(batch) infer_end_time = time.time() total_spent_time += infer_end_time - infer_start_time assert "logits" in student_outputs and "label_ids" in batch logits, label_ids = student_outputs["logits"], batch["label_ids"] for b in range(label_ids.shape[0]): _logits = logits[b] _label_ids = label_ids[b] mask_span_indices = batch["mask_span_indices"][b] for span_indices in mask_span_indices: pred = list() label = list() for span_idx in span_indices: pred.append(torch.argmax(_logits[span_idx]).item()) label.append(_label_ids[span_idx].item()) hit_num += (tuple(pred) == tuple(label)) total_num += 1 logits = logits.view(-1, logits.size(-1)) label_ids = label_ids.view(-1) indices = (label_ids != -100) logits = logits[indices] label_ids = label_ids[indices] tmp_loss = losses.cross_entropy(logits, label_ids) total_loss += tmp_loss.mean().item() total_steps += 1 total_samples += valid_loader.batch_size if (_step + 1) % 100 == 0: logger.info("Eval: %d/%d steps finished" % (_step + 1, len(valid_loader.dataset) // valid_loader.batch_size)) logger.info("Inference time = {:.2f}s, [{:.4f} ms / sample] ".format( total_spent_time, total_spent_time * 1000 / total_samples)) eval_loss = total_loss / total_steps logger.info("Eval loss: {}".format(eval_loss)) acc = hit_num / total_num logger.info("Accuracy: {}".format(acc)) eval_outputs = [("accuracy", acc)] return eval_outputs
def compute_loss(self, model_outputs, inputs): logits = model_outputs["logits"] label_ids = inputs["label_ids"] return {"loss": losses.cross_entropy(logits, label_ids)}