Exemple #1
0
def train(args):
    assert args.checkpoint is not None
    path = os.path.join(settings.PROJECT_ROOT, settings.LOAD_DIR)
    if args.checkpoint[-3:] == 'txt':
        with open(os.path.join(path, args.checkpoint), 'r') as f:
            genome = f.readlines()
            model = common.load_genome(genome, args)
    else:
        model = common.load_model(os.path.join(path, args.checkpoint))

    args.split_ratio = 0.99
    env = getattr(envs, args.env)(args)
    trainer = Trainer(env, model, args)

    logger = Logger('MAIN', args=args)
    logger.log("Begin training {}".format(args.checkpoint))
    best_acc = 0
    for epoch in range(args.epochs):
        trainer.train()
        if epoch % args.log_step == 0:
            logger.log("Training statistics for epoch: {}".format(epoch))
            logger.scalar_summary(trainer.info.avg, epoch)
            trainer.info.reset()

        trainer.infer(test=True)
        acc = trainer.info.avg['Accuracy/Top1']
        trainer.info.reset()
        logger.log("Validation accuracy: {}".format(acc))
        if acc > best_acc:
            best_acc = acc
            path = os.path.join(logger.log_dir, 'model.pth'.format(epoch))
            logger.log("Saving model at epoch: {}".format(epoch))
            common.save_model(model, path)
Exemple #2
0
def train(training_dataset: dataset.Word2VecDataset, config: configure.Config):
    """The training process
    
    Args:
        training_dataset(utils.dataset.Word2VecDataset): the dataset will be trained
        config(utils.configure.Config): all parameter that would be used in training
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    training_loader = data.DataLoader(training_dataset,
                                      batch_size=config.batch_size,
                                      shuffle=True)
    model = word2vec.init_word2vec_model(config.vocab_size,
                                         config.embedding_dim,
                                         config.batch_size, config.bag_size,
                                         config.mode)
    model = model.to(device)

    for epoch in range(config.epochs):
        print("Epoch [{}/{}]".format(epoch + 1, config.epochs))
        epoch_avg_loss = executor.train_step(model, training_loader, device,
                                             config)
        common.save_model(model, config.save_path, epoch_avg_loss, epoch)
Exemple #3
0
        state_all = torch.load(cfg.finetune)['model']
        state_clip = {}  # only use backbone parameters
        for k, v in state_all.items():
            if 'model' in k:
                state_clip[k] = v
        net.load_state_dict(state_clip, strict=False)
    if cfg.resume is not None:
        dist_print('==> Resume model from ' + cfg.resume)
        resume_dict = torch.load(cfg.resume, map_location='cpu')
        net.load_state_dict(resume_dict['model'])
        if 'optimizer' in resume_dict.keys():
            optimizer.load_state_dict(resume_dict['optimizer'])
        resume_epoch = int(os.path.split(cfg.resume)[1][2:5]) + 1
    else:
        resume_epoch = 0

    scheduler = get_scheduler(optimizer, cfg, len(train_loader))
    dist_print(len(train_loader))
    metric_dict = get_metric_dict(cfg)
    loss_dict = get_loss_dict(cfg)
    logger = get_logger(work_dir, cfg)
    cp_projects(work_dir)

    for epoch in range(resume_epoch, cfg.epoch):

        train(net, train_loader, loss_dict, optimizer, scheduler, logger,
              epoch, metric_dict, cfg.use_aux)

        save_model(net, optimizer, epoch, work_dir, distributed)
    logger.close()
        state_all = torch.load(cfg['finetune'])['model']
        state_clip = {}  # only use backbone parameters
        for k, v in state_all.items():
            if 'model' in k:
                state_clip[k] = v
        net.load_state_dict(state_clip, strict=False)
    if cfg['resume'] is not None:
        logger.log('==> Resume model from ' + cfg['resume'])
        resume_dict = torch.load(cfg['resume'], map_location='cpu')
        net.load_state_dict(resume_dict['model'])
        if 'optimizer' in resume_dict.keys():
            optimizer.load_state_dict(resume_dict['optimizer'])
        resume_epoch = int(os.path.split(cfg['resume'])[1][2:5]) + 1
    else:
        resume_epoch = 0
    scheduler = get_scheduler(optimizer, cfg['train'], len(train_loader))
    logger.log('Train Datasets Totoal: %d' % len(train_loader))
    metric_dict = get_metric_dict(cfg['dataset'])
    loss_dict = get_loss_dict(cfg)

    max_F = 0
    for epoch in range(resume_epoch, cfg['train']['epoch']):
        train(net, train_loader, loss_dict, optimizer, scheduler, logger,
              epoch, metric_dict, cfg['dataset']['use_aux'], args.local_rank)
        save_model(net, optimizer, epoch)
        if cfg['test']['val_intervals'] > 0 and epoch % cfg['test'][
                'val_intervals'] == 0:
            F = test(net, test_loader, cfg['dataset'], cfg['log_path'], logger)
            if F > max_F:
                save_model(net, optimizer, epoch, 'model_best.pth')
                max_F = F
Exemple #5
0
def train(args, x_train, y_train, x_val, y_val, x_test, y_test, fold_idx):
    # class_weights = class_weight.compute_class_weight('balanced',
    #                                                   np.unique(y_train),
    #                                                   y_train)
    y_train_bin = to_categorical(y_train, num_classes=args.num_classes)
    y_val_bin = to_categorical(y_val, num_classes=args.num_classes)
    y_test_bin = to_categorical(y_test, num_classes=args.num_classes)
    print('train shape:', x_train.shape, y_train.shape)
    print('val shape:', x_val.shape, y_val.shape)
    print('test shape:', x_test.shape, y_test.shape)

    print('Build model...')
    n_timesteps, n_features = x_train.shape[-2], x_train.shape[-1]
    print(f'n_timesteps {n_timesteps}')
    print(f'n_features: {n_features}')
    # n_features = x_train.shape[1]
    n_outputs = y_train_bin.shape[1]
    if os.path.isfile(args.model_file):
        model = load_model(args.model_file,
                           custom_objects={
                               'focal_loss_fixed': focal_loss(),
                               'ghm_class_loss': ghm.ghm_class_loss
                           })
    else:
        model = None

    type = model_type[args.type]

    if type == ModelType.ANN:
        x_train = reshape_data(x_train)
        n_features = x_train.shape[1]

    elif type == ModelType.LSTM:
        folder_name = '/lstm'
        if model is None:
            model = lstm_model(n_timesteps, n_features, n_outputs,
                               args.hidden_size, args.dense_size)
    elif type == ModelType.CNN:
        folder_name = '/cnn'
        if model is None:
            model = cnn_model_base(n_timesteps,
                                   n_features,
                                   n_outputs,
                                   hidden_layers=args.hidden_layers,
                                   kernels=args.kernels,
                                   dropout_rate=args.dropout_rate,
                                   activation=args.activation)
            # model = cnn_model(n_timesteps, n_features, n_outputs)

    elif type == ModelType.CNNF:
        folder_name = '/cnn_feature'
        if model is None:
            model = cnn_features_model(n_timesteps, n_features, n_outputs,
                                       nb_features)
    elif type == ModelType.CNNM:
        folder_name = '/cnn_m'
        if model is None:
            model = cnn_model(n_timesteps, n_features, n_outputs)

    elif type == ModelType.ANN:
        folder_name = '/ann'
        if model is None:
            model = ann_model(n_features, n_outputs)
    else:
        raise argparse.ArgumentTypeError(
            'Unsupported model type value encountered.')
    print(model.summary())
    ckpt_dir = args.ckpt_dir + folder_name
    log_dir = args.ckpt_dir + '/logs' + folder_name
    out_path = args.ckpt_dir + folder_name
    # checkpoint
    if not os.path.isdir(ckpt_dir):
        os.makedirs(ckpt_dir)
    if args.save_best:
        filepath = ckpt_dir + "/weights.best-" + str(fold_idx) + ".hdf5"
        fold_idx += 1
    else:
        filepath = ckpt_dir + "/weights-improvement-" \
                            "{epoch:02d}-{val_categorical_accuracy:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_categorical_accuracy',
                                 verbose=1,
                                 save_best_only=args.save_best,
                                 mode='max')
    lr_reduce = ReduceLROnPlateau(monitor='val_categorical_accuracy',
                                  factor=0.5,
                                  patience=20,
                                  verbose=0)
    early_stopper = EarlyStopping(monitor='val_loss',
                                  patience=30,
                                  verbose=1,
                                  mode='min',
                                  restore_best_weights=args.save_best)
    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)
    csv_log = CSVLogger(log_dir + '/trainning.log',
                        separator=',',
                        append=False)
    tensorboard = TensorBoard(log_dir=log_dir)

    callbacks_list = [checkpoint, lr_reduce, csv_log, tensorboard]
    print('Train...')
    if type == ModelType.CNNF:
        model.fit(
            [x_train, features_train],
            y_train_bin,
            batch_size=args.batch,
            epochs=args.epochs,
            validation_data=([x_val, features_val], y_val_bin),
            # class_weight=class_weights,
            callbacks=callbacks_list)
    else:
        model.fit(
            x_train,
            y_train_bin,
            batch_size=args.batch,
            epochs=args.epochs,
            validation_data=(x_val, y_val_bin),
            # class_weight=class_weights,
            callbacks=callbacks_list)
    save_model(model, 'har_cnn' + str(fold_idx), out_path)

    model = load_model(filepath,
                       custom_objects={
                           'focal_loss_fixed': focal_loss(),
                           'ghm_class_loss': ghm.ghm_class_loss
                       })
    if type == ModelType.CNNF:
        loss, acc = model.evaluate([x_test, features_test],
                                   y_test_bin,
                                   batch_size=args.batch,
                                   verbose=1)
    else:
        loss, acc = model.evaluate(x_test,
                                   y_test_bin,
                                   batch_size=args.batch,
                                   verbose=1)
    print('Test loss:', loss)
    print('Test accuracy:', acc)

    print("model's inputs:", model.inputs)
    print("model's outputs:", model.outputs)
    print(f'0 num: {np.sum(y_test == 0)}')
    print(f'1 num: {np.sum(y_test == 1)}')
    print(f'2 num: {np.sum(y_test == 2)}')
    print(f'3 num: {np.sum(y_test == 3)}')
    print(f'4 num: {np.sum(y_test == 4)}')
    print(f'5 num: {np.sum(y_test == 5)}')
    if type == ModelType.CNNF:
        y_pred_probs = model.predict([x_test, features_test])
    else:
        y_pred_probs = model.predict(x_test)
    stats_evaluation(y_test,
                     y_pred_probs,
                     num_classes=len(CategoryNames),
                     shift=args.shift,
                     show=False)
    return acc