def __trainCorpus(self, corpus, yesTagNames=[], noTagNames=[]): trainer = Trainer() success = trainer.train(corpus, yesTagNames=yesTagNames, noTagNames=noTagNames) if not success : print "Failed to classify document:%s " % corpus[:50] else : print "Trained document: %s" % corpus[:50]
def train(conf): loader = Loader(conf['embedding'], conf['text']) data, label_str, word2vec = loader.load() data = data[:700] labels = np.array(label_str[:700], dtype=np.int32) classifier = BiLstm(2, conf['embedding']['sequence_length'], word2vec.vocab_size, word2vec.embed_size) trainer = Trainer(classifier, word2vec.embeddings) trainer.train(data, labels)
def __untrainCorpus(self, corpus): trainer = Trainer() if corpus : success = trainer.untrain(corpus) if not success : print "Failed to untrain corpus: %s" % corpus[:50] else : print "Untrained corpus: %s" % corpus[:50]
def addIntent(): sentence = request.json['sentence'] intent = request.json['intent'] if client is not None: intents = Trainer("intents", client) intents.add_to_traingset(sentence, intent, True) return jsonify([]) else: print("NO DATABASE") return "NO DATABASE"
def addEntity(): intent = request.json['intent'] sentence = request.json['sentence'] entity = request.json['entity'] if client is not None: classifier_name = "entities@" + intent entities = Trainer(classifier_name, client) entities.add_to_traingset(sentence, entity, True) return jsonify([]) else: print("NO DATABASE") return "NO DATABASE"
def trainIntents(): if client is not None: intents = Trainer("intents", client) intents.start_training() if 'intents' not in cache.keys(): cache['intents'] = Classifier('intents', client) else: cache['intents'].load() return jsonify([]) else: print("NO DATABASE") return "NO DATABASE"
def trainEntity(): intent = request.json['intent'] if client is not None: classifier_name = "entities@" + intent entities = Trainer(classifier_name, client) entities.start_training() if classifier_name not in cache.keys(): cache[classifier_name] = Classifier(classifier_name, client) else: cache[classifier_name].load() return jsonify([]) else: print("NO DATABASE") return "NO DATABASE"
def train(conf): loader = Loader(conf['embedding'], conf['text']) data, label_str, word2vec = loader.load() labels = np.zeros_like(label_str) for idx, val in enumerate(label_str): if val in gender_mapping: labels[idx] = gender_mapping[val] else: labels[idx] = 0 classifier = BiLstm(4, conf['embedding']['sequence_length'], word2vec.vocab_size, word2vec.embed_size) trainer = Trainer(classifier, word2vec.embeddings) trainer.train(data, labels)
def main(args): mkdir_if_missing(args.logs_dir) #writer = SummaryWriter(args.logs_dir) sys.stdout = Logger(osp.join(args.logs_dir, 'train_log.txt')) print(args) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.benchmark = True # create data loaders data_dir = args.data_dir dataset, num_class, train_loader, val_loader = \ get_data( args.dataset, data_dir, args.crop_w, args.crop_h, args.batch_size, args.workers) # create model model = VGGNet(args.depth, with_bn=True, pretrained=True, num_class=num_class, dropout=args.dropout, input_size=(args.crop_w, args.crop_h)) model = model.cuda() # load from checkpoint if args.resume: checkpoint = load_checkpoint(args.resume) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] best_recall5 = checkpoint['best_recall5'] print("=> start epoch {} best top5 recall {:.1%}".format( args.start_epoch, best_recall5)) else: best_recall5 = 0 # criterion criterion = torch.nn.BCEWithLogitsLoss() criterion.cuda() # optimizer if args.optimizer == 'sgd': param_groups = model.parameters() base_param_ids = set(map(id, model.base.parameters())) new_params = [ p for p in model.parameters() if id(p) not in base_param_ids ] param_groups = [{ 'params': model.base.parameters(), 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.SGD(param_groups, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) elif args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: raise ValueError("Cannot recognize optimizer type:", args.optimizer) # create trainer and evaluator trainer = Trainer(model, criterion) evaluator = Evaluator(model) # Schedule learning rate def adjust_lr(epoch): if args.optimizer == 'sgd': lr = args.lr * (0.1**(epoch // 30)) elif args.optimizer == 'adam': lr = args.lr if epoch <= 50 else \ args.lr * (0.01 ** (epoch - 50) / 30) else: raise ValueError("Cannot recognize optimizer type:", args.optimizer) for g in optimizer.param_groups: g['lr'] = lr * g.get('lr_mult', 1) # start training val_prec, val_recall = evaluator.evaluate(val_loader) time_start = time.time() for epoch in range(args.start_epoch, args.epochs): adjust_lr(epoch) loss, prec, recall = trainer.train(epoch, train_loader, optimizer) #writer.add_scalar('Train loss', loss, epoch+1) #writer.add_scalar('Train prec', prec, epoch+1) #writer.add_scalar('Train recall', recall, epoch+1) val_prec, val_recall = evaluator.evaluate(val_loader) #writer.add_scalar('Val prec', val_prec, epoch+1) #writer.add_scalar('Val recall', val_recall, epoch+1) is_best = val_recall > best_recall5 best_recall5 = max(val_recall, best_recall5) save_checkpoint( { 'state_dict': model.state_dict(), 'epoch': epoch + 1, 'best_recall5': best_recall5, }, is_best, fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar')) time_end = time.time() print( '\n * Finished epoch {:3d} top5 recall: {:5.1%} best: {:5.1%}{}\n' .format(epoch, val_recall, best_recall5, ' *' if is_best else '')) print('IE最帅的明酱提醒您,现在已用时{:.2f}s,大概是{:.2f}min哦\n'.format( time_end - time_start, (time_end - time_start) / 60)) # final test print('Test with best model:') checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar')) model.load_state_dict(checkpoint['state_dict']) evaluator.evaluate(val_loader)