예제 #1
0
def main():
    df_train = pd.read_csv(DATA_ROOT / 'train_masks.csv')
    ids_train = df_train['img'].map(lambda s: s.split('.')[0])

    ids_train_split, ids_valid_split = train_test_split(ids_train, test_size=0.2, random_state=42)

    print('Training on {} samples'.format(len(ids_train_split)))
    print('Validating on {} samples'.format(len(ids_valid_split)))

    train_dataset = CarvanaTrainDataset(ids_train_split.values)
    valid_dataset = CarvanaTrainDataset(ids_valid_split.values)

    train_loader = DataLoader(train_dataset, shuffle=True, batch_size=TRAIN_BATCH_SIZE)
    valid_loader = DataLoader(valid_dataset, batch_size=TRAIN_BATCH_SIZE)

    model = UNet()
    model.cuda()
    if LOAD_MODEL:
        load_best_model(model)
    model.cuda()

    criterion = Loss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.0001)

    train_util.train(model, criterion, optimizer, 100, train_loader, valid_loader)
예제 #2
0
파일: train.py 프로젝트: wbj0110/cortex
def train(args):
    ctx = Context(s3_path=args.context,
                  cache_dir=args.cache_dir,
                  workload_id=args.workload_id)

    package.install_packages(ctx.python_packages, ctx.bucket)

    model = ctx.models_id_map[args.model]

    logger.info("Training")

    with util.Tempdir(ctx.cache_dir) as temp_dir:
        model_dir = os.path.join(temp_dir, "model_dir")
        ctx.upload_resource_status_start(model)

        try:
            model_impl = ctx.get_model_impl(model["name"])
            train_util.train(model["name"], model_impl, ctx, model_dir)
            ctx.upload_resource_status_success(model)

            logger.info("Caching")
            logger.info("Caching model " + model["name"])
            model_export_dir = os.path.join(model_dir, "export", "estimator")
            model_zip_path = os.path.join(temp_dir, "model.zip")
            util.zip_dir(model_export_dir, model_zip_path)

            aws.upload_file_to_s3(local_path=model_zip_path,
                                  key=model["key"],
                                  bucket=ctx.bucket)
            util.log_job_finished(ctx.workload_id)

        except CortexException as e:
            ctx.upload_resource_status_failed(model)
            e.wrap("error")
            logger.error(str(e))
            logger.exception(
                "An error occurred, see `cx logs model {}` for more details.".
                format(model["name"]))
            sys.exit(1)
        except Exception as e:
            ctx.upload_resource_status_failed(model)
            logger.exception(
                "An error occurred, see `cx logs model {}` for more details.".
                format(model["name"]))
            sys.exit(1)
예제 #3
0
def run(config):
    """Entry point to run training."""
    init_data_normalizer(config)

    stage_ids = train_util.get_stage_ids(**config)
    if not config['train_progressive']:
        stage_ids = list(stage_ids)[-1:]

    # Train one stage at a time
    for stage_id in stage_ids:
        batch_size = train_util.get_batch_size(stage_id, **config)
        tf.reset_default_graph()
        with tf.device(tf.train.replica_device_setter(config['ps_tasks'])):
            model = lib_model.Model(stage_id, batch_size, config)
            model.add_summaries()
            print('Variables:')
            for v in tf.global_variables():
                print('\t', v.name, v.get_shape().as_list())
            logging.info('Calling train.train')
            train_util.train(model, **config)
예제 #4
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--token_data",
                        default=None,
                        type=str,
                        required=True,
                        help="包含 train,test,evl 和 vocab.json的文件夹")

    parser.add_argument("--feature_dir_prefix",
                        default="features",
                        help="train,test,evl从样本转化成特征所存储的文件夹前缀置")

    parser.add_argument("--do_train", action='store_true', help="是否进行训练")
    parser.add_argument("--do_decode", action='store_true', help="是否对测试集进行测试")

    parser.add_argument("--example_num",
                        default=1024 * 8,
                        type=int,
                        help="每一个特征文件所包含的样本数量")

    parser.add_argument("--article_max_len",
                        default=400,
                        type=int,
                        help="文章的所允许的最大长度")

    parser.add_argument("--abstract_max_len",
                        default=100,
                        type=int,
                        help="摘要所允许的最大长度")

    parser.add_argument("--vocab_num",
                        default=50000,
                        type=int,
                        help="词表所允许的最大长度")

    parser.add_argument("--pointer_gen", action='store_true', help="是否使用指针机制")

    parser.add_argument("--use_coverage", action="store_true", help="是否使用汇聚机制")

    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="当GPU可用时,选择不用GPU")

    parser.add_argument("--epoch_num", default=10, type=int, help="epoch")

    parser.add_argument("--train_batch_size",
                        default=16,
                        type=int,
                        help="train batch size")

    parser.add_argument("--eval_batch_size",
                        default=64,
                        type=int,
                        help="evaluate batch size")

    parser.add_argument("--hidden_dim",
                        default=256,
                        type=int,
                        help="hidden dimension")
    parser.add_argument("--embedding_dim",
                        default=128,
                        type=int,
                        help="embedding dimension")
    parser.add_argument("--coverage_loss_weight",
                        default=1.0,
                        type=float,
                        help="coverage loss weight ")
    parser.add_argument("--eps",
                        default=1e-12,
                        type=float,
                        help="log(v + eps) Avoid  v == 0,")
    parser.add_argument("--dropout", default=0.5, type=float, help="dropout")

    parser.add_argument("--lr", default=1e-3, type=float, help="learning rate")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")

    parser.add_argument("--adagrad_init_acc",
                        default=0.1,
                        type=float,
                        help="learning rate")

    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")

    parser.add_argument(
        "--gradient_accumulation_steps",
        default=1,
        type=int,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument("--output_dir",
                        default="output",
                        type=str,
                        help="Folder to store models and results")

    parser.add_argument("--evaluation_steps",
                        default=500,
                        type=int,
                        help="Evaluation every N steps of training")
    parser.add_argument("--seed", default=4321, type=int, help="Random seed")

    args = parser.parse_args()
    args.device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    set_seed(args.seed)

    vocab_file = os.path.join(args.token_data, 'vocab.json')
    assert os.path.exists(vocab_file)
    vocab = Vocab(vocab_file=vocab_file, vob_num=args.vocab_num)

    check(args, vocab=vocab)

    model = PointerGeneratorNetworks(vob_size=args.vocab_num,
                                     embed_dim=args.embedding_dim,
                                     hidden_dim=args.hidden_dim,
                                     pad_idx=vocab.pad_idx,
                                     dropout=args.dropout,
                                     pointer_gen=args.pointer_gen,
                                     use_coverage=args.use_coverage)

    model = model.to(args.device)
    model = model.to(args.device)
    if args.do_train:
        optimizer = Adam(model.parameters(), lr=args.lr)
        train(args=args, model=model, optimizer=optimizer, with_eval=True)
    if args.do_decode:
        decoder(args, model, vocab=vocab)
예제 #5
0
    idx: class_
    for class_, idx in model.class_to_idx.items()
}

for p in optimizer.param_groups[0]['params']:
    if p.requires_grad:
        print(p.shape)  # 최적화해야할 파라미터 그룹 출력

cuda.empty_cache()  # GPU 캐시 초기화

model, history = train_util.train(
    model,  # 사용할 모델
    criterion,  # 사용할 Loss 함수
    optimizer,  # 사용할 Optimizer함수
    dataloaders['train'],  # train 데이터셋
    dataloaders['val'],  # validation 데이터셋
    save_file_name=save_file_name,  # 저장할 이름
    max_epochs_stop=10,  # 몇 epoch 동안 vaild loss의 감소가 없으면 학습을 중단할 것인지
    n_epochs=training_epoch,  # 최대 몇 epochs 학습할것인지
    print_every=1,  # 몇 epoch마다 출력할 것인지
    early_stop=train_util.Early_stop)  # Early_stop을 할것인지

# Loss, Acc 그래프 저장 함수
train_util.save_train_valid_loss(history, model_choice)

# 모델 저장 함수
train_util.save_checkpoint(model,
                           path=checkpoint_path,
                           model_name=model_choice)

# 랜덤하게 이미지를 한장 뽑아내는 함수
예제 #6
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("--token_data",
                        default=None,
                        required=True,
                        type=str,
                        help="包含 train,test,dev 和 vocab.json的文件夹")

    parser.add_argument("--feature_dir_prefix",
                        default="features",
                        help="train,test,evl从样本转化成特征所存储的文件夹前缀置")

    parser.add_argument("--do_train", action='store_true', help="是否进行训练")
    parser.add_argument("--do_decode", action='store_true', help="是否对测试集进行测试")

    parser.add_argument("--example_num",
                        default=1024 * 8,
                        type=int,
                        help="每一个特征文件所包含的样本数量")

    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="当GPU可用时,选择不用GPU")

    parser.add_argument("--epoch_num", default=15, type=int, help="epoch")

    parser.add_argument("--train_batch_size",
                        default=16,
                        type=int,
                        help="train batch size")

    parser.add_argument(
        "--gradient_accumulation_steps",
        default=4,
        type=int,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass."
    )

    parser.add_argument("--eval_batch_size",
                        default=128,
                        type=int,
                        help="evaluate batch size")

    parser.add_argument("--lr", default=1e-3, type=float, help="learning rate")

    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")

    parser.add_argument("--adagrad_init_acc",
                        default=0.1,
                        type=float,
                        help="adagrad init acc")

    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")

    parser.add_argument("--output_dir",
                        default="output",
                        type=str,
                        help="Folder to store models and results")

    parser.add_argument("--evaluation_steps",
                        default=500,
                        type=int,
                        help="Evaluation every N steps of training")
    parser.add_argument("--seed", default=4321, type=int, help="Random seed")

    args = parser.parse_args()
    args.device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    set_seed(args.seed)

    vocab_file = os.path.join(args.token_data, 'vocab.json')
    assert os.path.exists(vocab_file)

    model_config_file = os.path.join(".", "model", "model_config.json")
    assert os.path.exists(model_config_file)
    with open(model_config_file, "r", encoding="utf-8") as f:
        model_config_dict = json.load(f)
        f.close()
    model_config = ModelConfig(**model_config_dict)

    vocab = Vocab(vocab_file=vocab_file, vob_num=model_config.vocab_size)

    model_config.pad_idx = vocab.pad_idx
    model_config.unk_idx = vocab.unk_idx
    model_config.start_idx = vocab.start_idx
    model_config.stop_idx = vocab.stop_idx

    check(args, model_config, vocab)

    model = PointerGeneratorNetworks(config=model_config)

    model = model.to(args.device)
    model = model.to(args.device)
    if args.do_train:
        optimizer = Adam(model.parameters(), lr=args.lr)
        train(args=args,
              model_config=model_config,
              model=model,
              optimizer=optimizer,
              with_eval=True)
    if args.do_decode:
        decoder(args, model_config=model_config, model=model, vocab=vocab)
예제 #7
0
def kqn_main():
    # argument parser
    # default hyperparams not set to optimal
    # dropout not used in the implementation
    parser = ArgumentParser()
    parser.add_argument(
        '--dataset',
        type=str,
        default='assist0910',
        help='choose from assist0910, assist15, statics11, and synthetic-5')
    parser.add_argument('--version',
                        type=int,
                        default=None,
                        help='if dataset==synthetic-5, choose from 0 to 19')
    parser.add_argument(
        '--min_seq_len',
        type=int,
        default=2,
        help=
        'minimum threshold of number of time steps to discard student problem-solving records.'
    )
    parser.add_argument('--rnn',
                        type=str,
                        default='lstm',
                        help='rnn type. one of lstm and gru.')
    parser.add_argument(
        '--hidden',
        type=int,
        default=128,
        help='dimensionality of skill and knowledge state vectors')
    parser.add_argument(
        '--rnn_hidden',
        type=int,
        default=128,
        help='number of hidden units for knowledge state encoder rnn')
    parser.add_argument('--mlp_hidden',
                        type=int,
                        default=128,
                        help='number of hidden units for skill encoder mlp')
    parser.add_argument('--layer',
                        type=int,
                        default=1,
                        help='number of rnn layers')
    parser.add_argument('--gpu',
                        type=int,
                        default=-1,
                        help='which gpu to use. default to -1: not using any')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate for adam')
    parser.add_argument('--batch', type=int, default=100, help='batch size')
    parser.add_argument('--ckpt',
                        type=str,
                        default='./ckpt',
                        help='default checkpoint path')
    parser.add_argument('--epoch',
                        type=int,
                        default=100,
                        help='number of epochs')
    parser.add_argument(
        '--optim',
        type=str,
        default='adam',
        help='optimizer to use. currently only adam is implemented.')

    args = parser.parse_args()
    dataset = args.dataset
    version = args.version
    min_seq_len = args.min_seq_len
    rnn_type = args.rnn
    n_hidden = args.hidden
    n_rnn_hidden = args.rnn_hidden
    n_mlp_hidden = args.mlp_hidden
    n_rnn_layers = args.layer
    gpu = args.gpu
    lr = args.lr
    batch_size = args.batch
    ckpt_path = args.ckpt
    n_epochs = args.epoch
    opt_str = args.optim

    if ckpt_path is not None:
        if not (os.path.exists(ckpt_path)):
            os.makedirs(ckpt_path)

    if gpu == -1:
        DEVICE = 'cpu'
    elif torch.cuda.is_available():
        DEVICE = gpu

    # load data
    n_skills = get_num_skills(dataset)
    fnames = {
        'train': get_csv_fname(True, dataset, version),
        'eval': get_csv_fname(False, dataset, version)
    }
    datasets = {
        'train': read_csv(fnames['train'], min_seq_len),
        'eval': read_csv(fnames['eval'])
    }
    datasets = {
        'train':
        KQNDataset(datasets['train'][0], datasets['train'][1],
                   datasets['train'][2], n_skills),
        'eval':
        KQNDataset(datasets['eval'][0], datasets['eval'][1],
                   datasets['eval'][2], n_skills)
    }
    dataloaders = {
        'train':
        DataLoader(datasets['train'],
                   batch_size=batch_size,
                   drop_last=False,
                   collate_fn=PadSequence(),
                   shuffle=True),
        'eval':
        DataLoader(datasets['eval'],
                   batch_size=batch_size,
                   drop_last=False,
                   collate_fn=PadSequence())
    }

    model = KQN(n_skills, n_hidden, n_rnn_hidden, n_mlp_hidden, n_rnn_layers,
                rnn_type, DEVICE).to(DEVICE)

    if opt_str == 'adam': opt_class = Adam

    optimizer = opt_class(model.parameters(), lr=lr)
    writer = SummaryWriter('./logs')

    train(model, dataloaders, optimizer, writer, n_epochs, ckpt_path, DEVICE)
예제 #8
0
    with tf.Session(config=config, graph=net.graph) as sess:
        sess.run(tf.global_variables_initializer(), {net.is_training: True})

        if FLAGS.in_model_dirs:
            exclude = ''
            if 'embedding' in FLAGS.loss_func:
                exclude = 'Yp'
            elif 'position' in FLAGS.loss_func:
                exclude = 'Yc'

            for in_model_dir in FLAGS.in_model_dirs.split(','):
                assert(load_model(sess, in_model_dir, exclude))

        if FLAGS.train:
            train(sess, net, train_data, test_data, n_epochs=FLAGS.n_epochs,
                    snapshot_epoch=FLAGS.snapshot_epoch,
                    model_dir=FLAGS.out_model_dir, log_dir=FLAGS.log_dir,
                    data_name=g_shape_synset, output_generator=None)
        else:
            '''
            train_loss, train_accuracy, _ = evaluate(sess, net, train_data)
            test_loss, test_accuracy, _ = evaluate(sess, net, test_data)

            msg = "|| Train Loss: {:6f}".format(train_loss)
            msg += " | Train Accu: {:5f}".format(train_accuracy)
            msg += " | Test Loss: {:6f}".format(test_loss)
            msg += " | Test Accu: {:5f}".format(test_accuracy)
            msg += " ||"
            print(msg)
            '''

            if 'joint_embedding' in FLAGS.loss_func or\
예제 #9
0
def run(args, train_data, val_data, test_data):
    tf.set_random_seed(1234)
    np.random.seed(1234)
    random.seed(1234)

    print('\n==== PARAMS ====')
    for arg in vars(args):
        print('{}={}'.format(arg, getattr(args, arg)))
    print('========\n')

    if args.exp_type == 'ours':
        net = Network(train_data.n_points, train_data.n_dim,
                      test_data.n_seg_ids, args.K, args.batch_size,
                      args.init_learning_rate, args.decay_step,
                      args.decay_rate, args.bn_decay_step,
                      args.l21_norm_weight, args.net_options)
    elif args.exp_type == 'sem_seg':
        print("## Sementic Segmentation ##")
        net = NetworkSemSeg(train_data.n_points, train_data.n_dim,
                            train_data.n_labels, args.batch_size,
                            args.init_learning_rate, args.decay_step,
                            args.decay_rate, args.bn_decay_step,
                            args.net_options)
    else:
        assert (False)

    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.allow_growth = True

    with tf.Session(config=config, graph=net.graph) as sess:
        sess.run(tf.global_variables_initializer(), {net.is_training: True})

        if args.in_model_dirs:
            include = ''
            for in_model_dir in args.in_model_dirs.split(','):
                assert (load_model(sess, in_model_dir, include))

        if args.train:
            train(sess,
                  net,
                  args.exp_type,
                  train_data,
                  val_data,
                  n_epochs=args.n_epochs,
                  snapshot_epoch=args.snapshot_epoch,
                  validation_epoch=args.validation_epoch,
                  model_dir=args.out_model_dir,
                  log_dir=args.log_dir,
                  data_name=train_data.name,
                  output_generator=None)

        train_loss, _ = validate(sess, net, args.exp_type, train_data)
        test_loss, _ = validate(sess, net, args.exp_type, test_data)

        msg = "|| Train Loss: {:6f}".format(train_loss)
        msg += " | Test Loss: {:6f}".format(test_loss)
        msg += " ||"
        print(msg)

        if args.train:
            # Save training result.
            if not os.path.exists(args.out_dir): os.makedirs(args.out_dir)
            out_file = os.path.join(
                args.out_dir,
                '{}.txt'.format(datetime.now().strftime("%Y-%m-%d_%H-%M-%S")))
            with open(out_file, 'w') as f:
                f.write(msg + '\n')
            print("Saved '{}'.".format(out_file))

        if args.exp_type == 'ours':
            if 'eval' in args.eval_type:
                evaluate.evaluate(sess, net, test_data, args.out_dir)
            if 'eval_keypoints' in args.eval_type:
                evaluate_keypoints.evaluate(sess, net, test_data, args.out_dir)
            if 'eval_obj_det' in args.eval_type:
                evaluate_obj_det.evaluate(sess, net, test_data, args.out_dir)
            if 'save_dict' in args.eval_type:
                P = test_data.point_clouds
                A = predict_A(P, sess, net)
                out_file = os.path.join(args.out_dir, 'dictionary.npy')
                np.save(out_file, A)
                print("Saved '{}'".format(out_file))
        elif args.exp_type == 'sem_seg':
            evaluate_sem_seg.evaluate(sess, net, test_data, args.out_dir)
예제 #10
0
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# --------------------------------------- #
# --- Full precision model load/train --- #
# --------------------------------------- #

if args.model == "vgg16":
    net = VGG16()
elif args.model == "resnet50":
    net = ResNet50()
else:
    print("Model {} not supported!".format(args.model))
    sys.exit(0)
net = net.to(device)

# Uncomment to load pretrained weights
#net.load_state_dict(torch.load("net_before_pruning.pt"))

# Comment if you have loaded pretrained weights
# Tune the hyperparameters here.
if not args.skip_pt:
    train(net, epochs=args.epochs, batch_size=args.batch, lr=args.lr, reg=args.reg, checkpoint_path=args.ckpt_dir)
else:
    net.load_state_dict(torch.load(args.path))
    print("Net loaded from {}".format(args.path))
    test(net)

summary(net)

예제 #11
0
import model
import train_util

import torch
import torch.nn as nn
import torch.optim as optim

train_path = ''
val_path = ''

dset_loaders, dset_sizes, dset_classes = data_util.load_data(
    train_path=train_path, val_path=val_path)

print(dset_sizes)
print(dset_classes)

net = model.AlexNet().cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), weight_decay=0.0005)
lr_scheduler = train_util.exp_lr_scheduler
lr = 0.001

best_model, best_acc = train_util.train(net, criterion, optimizer,
                                        lr_scheduler, dset_loaders, dset_sizes,
                                        lr, 40)

print('Saving the best model')
filename = 'trained_model_val_{:.2f}.pt'.format(best_acc)
torch.save(best_model.state_dict(), filename)