def __trainCorpus(self, corpus, yesTagNames=[], noTagNames=[]):
     trainer = Trainer()
     success = trainer.train(corpus, yesTagNames=yesTagNames, noTagNames=noTagNames)
     if not success :
         print "Failed to classify document:%s " % corpus[:50]
     else :
         print "Trained document: %s" % corpus[:50]
Example #2
0
def train(conf):
    loader = Loader(conf['embedding'], conf['text'])
    data, label_str, word2vec = loader.load()
    data = data[:700]
    labels = np.array(label_str[:700], dtype=np.int32)
    classifier = BiLstm(2, conf['embedding']['sequence_length'], word2vec.vocab_size, word2vec.embed_size)
    trainer = Trainer(classifier, word2vec.embeddings)
    trainer.train(data, labels)
 def __untrainCorpus(self, corpus):
     trainer = Trainer()
     
     if corpus :
         success = trainer.untrain(corpus)
         if not success :
             print "Failed to untrain corpus: %s" % corpus[:50]
         else :
             print "Untrained corpus: %s" % corpus[:50]
Example #4
0
def addIntent():
    sentence = request.json['sentence']
    intent = request.json['intent']
    if client is not None:
        intents = Trainer("intents", client)
        intents.add_to_traingset(sentence, intent, True)
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
Example #5
0
def addEntity():
    intent = request.json['intent']
    sentence = request.json['sentence']
    entity = request.json['entity']
    if client is not None:
        classifier_name = "entities@" + intent
        entities = Trainer(classifier_name, client)
        entities.add_to_traingset(sentence, entity, True)
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
Example #6
0
def trainIntents():
    if client is not None:
        intents = Trainer("intents", client)
        intents.start_training()
        if 'intents' not in cache.keys():
            cache['intents'] = Classifier('intents', client)
        else:
            cache['intents'].load()
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
Example #7
0
def trainEntity():
    intent = request.json['intent']
    if client is not None:
        classifier_name = "entities@" + intent
        entities = Trainer(classifier_name, client)
        entities.start_training()
        if classifier_name not in cache.keys():
            cache[classifier_name] = Classifier(classifier_name, client)
        else:
            cache[classifier_name].load()
        return jsonify([])
    else:
        print("NO DATABASE")
        return "NO DATABASE"
Example #8
0
def train(conf):
    loader = Loader(conf['embedding'], conf['text'])
    data, label_str, word2vec = loader.load()

    labels = np.zeros_like(label_str)
    for idx, val in enumerate(label_str):
        if val in gender_mapping:
            labels[idx] = gender_mapping[val]
        else:
            labels[idx] = 0

    classifier = BiLstm(4, conf['embedding']['sequence_length'], word2vec.vocab_size, word2vec.embed_size)
    trainer = Trainer(classifier, word2vec.embeddings)
    trainer.train(data, labels)
def main(args):

    mkdir_if_missing(args.logs_dir)
    #writer = SummaryWriter(args.logs_dir)

    sys.stdout = Logger(osp.join(args.logs_dir, 'train_log.txt'))
    print(args)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.benchmark = True

    # create data loaders
    data_dir = args.data_dir
    dataset, num_class, train_loader, val_loader = \
        get_data(
            args.dataset, data_dir,
            args.crop_w, args.crop_h,
            args.batch_size, args.workers)

    # create model
    model = VGGNet(args.depth,
                   with_bn=True,
                   pretrained=True,
                   num_class=num_class,
                   dropout=args.dropout,
                   input_size=(args.crop_w, args.crop_h))
    model = model.cuda()

    # load from checkpoint
    if args.resume:
        checkpoint = load_checkpoint(args.resume)
        model.load_state_dict(checkpoint['state_dict'])
        args.start_epoch = checkpoint['epoch']
        best_recall5 = checkpoint['best_recall5']
        print("=> start epoch {}  best top5 recall {:.1%}".format(
            args.start_epoch, best_recall5))
    else:
        best_recall5 = 0

    # criterion
    criterion = torch.nn.BCEWithLogitsLoss()
    criterion.cuda()

    # optimizer
    if args.optimizer == 'sgd':
        param_groups = model.parameters()
        base_param_ids = set(map(id, model.base.parameters()))
        new_params = [
            p for p in model.parameters() if id(p) not in base_param_ids
        ]
        param_groups = [{
            'params': model.base.parameters(),
            'lr_mult': 0.1
        }, {
            'params': new_params,
            'lr_mult': 1.0
        }]
        optimizer = torch.optim.SGD(param_groups,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=True)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    else:
        raise ValueError("Cannot recognize optimizer type:", args.optimizer)

    # create trainer and evaluator
    trainer = Trainer(model, criterion)
    evaluator = Evaluator(model)

    # Schedule learning rate
    def adjust_lr(epoch):
        if args.optimizer == 'sgd':
            lr = args.lr * (0.1**(epoch // 30))
        elif args.optimizer == 'adam':
            lr = args.lr if epoch <= 50 else \
                args.lr * (0.01 ** (epoch - 50) / 30)
        else:
            raise ValueError("Cannot recognize optimizer type:",
                             args.optimizer)
        for g in optimizer.param_groups:
            g['lr'] = lr * g.get('lr_mult', 1)

    # start training
    val_prec, val_recall = evaluator.evaluate(val_loader)
    time_start = time.time()
    for epoch in range(args.start_epoch, args.epochs):
        adjust_lr(epoch)
        loss, prec, recall = trainer.train(epoch, train_loader, optimizer)
        #writer.add_scalar('Train loss', loss, epoch+1)
        #writer.add_scalar('Train prec', prec, epoch+1)
        #writer.add_scalar('Train recall', recall, epoch+1)

        val_prec, val_recall = evaluator.evaluate(val_loader)
        #writer.add_scalar('Val prec', val_prec, epoch+1)
        #writer.add_scalar('Val recall', val_recall, epoch+1)

        is_best = val_recall > best_recall5
        best_recall5 = max(val_recall, best_recall5)
        save_checkpoint(
            {
                'state_dict': model.state_dict(),
                'epoch': epoch + 1,
                'best_recall5': best_recall5,
            },
            is_best,
            fpath=osp.join(args.logs_dir, 'checkpoint.pth.tar'))
        time_end = time.time()
        print(
            '\n * Finished epoch {:3d}  top5 recall: {:5.1%}  best: {:5.1%}{}\n'
            .format(epoch, val_recall, best_recall5, ' *' if is_best else ''))
        print('IE最帅的明酱提醒您,现在已用时{:.2f}s,大概是{:.2f}min哦\n'.format(
            time_end - time_start, (time_end - time_start) / 60))

    # final test
    print('Test with best model:')
    checkpoint = load_checkpoint(osp.join(args.logs_dir, 'model_best.pth.tar'))
    model.load_state_dict(checkpoint['state_dict'])
    evaluator.evaluate(val_loader)