def main(_run, _log):
    args = argparse.Namespace(**_run.config)
    args = post_config_hook(args, _run)

    args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    root = "./datasets"

    train_sampler = None

    if args.dataset == "STL10":
        train_dataset = torchvision.datasets.STL10(
            root, split="unlabeled", download=True, transform=TransformsSimCLR()
        )
    elif args.dataset == "CIFAR10":
        train_dataset = torchvision.datasets.CIFAR10(
            root, download=True, transform=TransformsSimCLR()
        )
    else:
        raise NotImplementedError

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    model, optimizer, scheduler = load_model(args, train_loader)

    tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"])
    os.makedirs(tb_dir)
    writer = SummaryWriter(log_dir=tb_dir)

    mask = mask_correlated_samples(args)
    criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device)

    args.global_step = 0
    args.current_epoch = 0
    for epoch in range(args.start_epoch, args.epochs):
        lr = optimizer.param_groups[0]['lr']
        loss_epoch = train(args, train_loader, model, criterion, optimizer, writer)

        if scheduler:
            scheduler.step()

        if epoch % 10 == 0:
            save_model(args, model, optimizer)

        writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch)
        writer.add_scalar("Misc/learning_rate", lr, epoch)
        print(
            f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}"
        )
        args.current_epoch += 1

    ## end training
    save_model(args, model, optimizer)
Exemple #2
0
def main(verbose):
    dataset = load_dataset(
        glob('../data/trump_tweet_data_archive/condensed_*.json.zip'), verbose)
    corpus, sequences, next_chars, c2i, i2c, nc = seq_data(
        dataset, SEQ_LEN, SEQ_STEP, verbose)

    if verbose:
        print(f'corpus length: {len(corpus)}')
        print(f'num characters: {nc}')
        print(f'number of sequences: {len(sequences)}')

    # The data is shuffled so the validation data isn't simply the latest 20% of tweets
    X, y = vec_data(sequences, next_chars, SEQ_LEN, nc, c2i, verbose)
    # Split off the last 20% as validation data for pretty graphs
    n = len(X)
    num_val = int(PERCENT_VALIDATION * n)
    X_val = X[n - num_val:]
    y_val = y[n - num_val:]

    X_train = X[:n - num_val]
    y_train = y[:n - num_val]

    if verbose:
        print(f'Number validation samples: {num_val}')

    model = build_model(SEQ_LEN, nc, verbose)
    history = train_model(model, X_train, y_train, X_val, y_val, verbose)
    plot_model_loss(BASENAME, history, verbose)
    # Save the trained model so we don't have to wait 25 hours to generate another 10 tweet sample
    save_model(model, BASENAME, verbose)
    # Generate sample tweets using 10 random seeds from the corpus.
    generate(BASENAME, model, corpus, c2i, i2c, nc, 10, verbose)
Exemple #3
0
 def train_cnn(x_train, y_train, x_val, y_val):
     model = Sequential()
     model.add(
         Conv2D(64,
                kernel_size=(3, 3),
                activation='relu',
                input_shape=input_shape))
     model.add(MaxPooling2D(pool_size=(2, 2)))
     model.add(Conv2D(64, (3, 3), activation='relu'))
     model.add(MaxPooling2D(pool_size=(2, 2)))
     model.add(Dropout(0.25))
     model.add(Flatten())
     model.add(Dense(256, activation='relu'))
     model.add(Dropout(0.5))
     model.add(Dense(128, activation='relu'))
     model.add(Dropout(0.5))
     model.add(Dense(num_class, activation='softmax'))
     model.compile(loss='categorical_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])
     model_fit_history = model.fit(x_train,
                                   y_train,
                                   batch_size=50,
                                   epochs=100,
                                   verbose=1,
                                   validation_data=(x_val, y_val))
     loss, acc = model.evaluate(x_val, y_val, verbose=0)
     print('\nTesting loss: {}, acc: {}\n'.format(loss, acc))
     plots_loss_accuracy_from_training(model_fit_history)
     save_model(model,
                path_model=path_save_model,
                path_weights=path_save_weights)
Exemple #4
0
def test(rank, args, shared_model, counter):
    torch.manual_seed(args.seed + rank)

    env = create_atari_env(args.env_name)
    env.seed(args.seed + rank)

    model = ActorCritic(env.observation_space.shape[0], env.action_space)

    model.eval()

    state = env.reset()
    state = torch.from_numpy(state)
    reward_sum = 0
    done = True

    start_time = time.time()

    # a quick hack to prevent the agent from stucking
    actions = deque(maxlen=100)
    episode_length = 0
    while True:
        episode_length += 1
        # Sync with the shared model
        if done:
            model.load_state_dict(shared_model.state_dict())

        if done and counter.value > args.max_steps:
            test_final(shared_model, env, args)
            save_model(shared_model, args)
            exit()

        with torch.no_grad():
            value, logit = model(state.unsqueeze(0))
        prob = F.softmax(logit, dim=-1)
        action = prob.max(1, keepdim=True)[1].numpy()

        state, reward, done, _ = env.step(action[0, 0])
        done = done or episode_length >= args.max_episode_length
        reward_sum += reward

        # a quick hack to prevent the agent from stucking
        actions.append(action[0, 0])
        if actions.count(actions[0]) == actions.maxlen:
            done = True

        if done:
            print(
                "Time {}, num steps {}, FPS {:.0f}, episode reward {}, episode length {}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    counter.value, counter.value / (time.time() - start_time),
                    reward_sum, episode_length))
            reward_sum = 0
            episode_length = 0
            actions.clear()
            state = env.reset()
            time.sleep(60)

        state = torch.from_numpy(state)
Exemple #5
0
def main():
    args = load_arg()
    print(f"Run:{args.lang}")

    args.device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    args.n_gpu = torch.cuda.device_count()

    set_seed(args)

    train_dataset, dev_dataset = load_shinra_data(args)
    model = CNN(args, train_dataset.num_labels, \
                emb_freeze=args.emb_freeze, \
                class_weight=train_dataset.get_class_weight())

    model.to(args.device)

    scores = {}
    model, scores["train"], scores["dev"], best_epoch = train(
        args, train_dataset, dev_dataset, model)

    model.to("cpu")

    if args.output_dir is not None:
        output_dir = f"{args.output_dir}/{args.lang}"
        os.makedirs(output_dir, exist_ok=True)
        save_model(output_dir, model)
        save_json(f"{output_dir}/score.json", scores)
Exemple #6
0
def main(args):
    files = glob(args.data_path + '*.jpg')
    training_data = ImageDataset(files[:int(len(files) * .9)])
    dev_data = ImageDataset(files[int(len(files) * .9):])
    model = train(training_data, dev_data, args)
    print('Saving model to %s' % args.o)
    save_model(model, args.o)
Exemple #7
0
def train_main():
    with open('config.json') as f:
        args = json.load(f)

    # make save folder
    try:
        print('Creating checkpoint folder...')
        os.makedirs(args['save_folder'])
    except OSError as e:
        if e.errno == errno.EEXIST:
            print('Directory already exists.')
        else:
            raise

    # read and preprocess data
    train_data = pd.read_csv(args['train_path'])
    preprocessed_train = preprocess(train_data, args)

    if args['train_model_weights']:  # resume training
        model = restore_model(args['save_folder'], args['train_model_weights'])
    else:
        model = create_model(args)
        save_model(model, args['save_folder'])

    # split data for CV
    train_set, val_set = val_split(*preprocessed_train[0],
                                   preprocessed_train[1])

    model, history = fit_model(model, train_set, val_set, args)
    plot_model_history(history, args['save_folder'])
Exemple #8
0
def train(ctx, dataset_fpath, all_data, max_depth, model_fpath, name, test):

    if not os.path.isfile(dataset_fpath):
        logging.info('No dataset was provided, building with default settings')
        data.save_dataset(dataset_fpath)

    dataset = data.load_dataset(dataset_fpath, return_arrays=False)
    clf = model.REGISTRY[name](max_depth=max_depth)

    X_train, y_train = dataset['X_train'], dataset['y_train']
    X_test, y_test = dataset['X_test'], dataset['y_test']
    if all_data:
        X_train = np.concatenate((X_train, X_test), axis=0)
        y_train = np.concatenate((y_train, y_test), axis=0)

    clf.fit(X_train, y_train)

    model.save_model(clf, model_fpath)

    acc = clf.score(X_train, y_train)
    logging.info("Accuracy on training set: {}".format(acc))

    if test:
        acc = clf.score(X_test, y_test)
        logging.info("Accuracy on the test set: {}".format(acc))
Exemple #9
0
def main(args):
    # process input file
    input_file = util.ensure_local_file(args['train_file'])
    user_map, item_map, tr_sparse, test_sparse = model.create_test_and_train_sets(
        args, input_file, args['data_type'])

    # train model
    output_row, output_col = model.train_model(args, tr_sparse)

    # save trained model to job directory
    if args['data_type'] == 'user_ratings':
        model.save_model_json(args, user_map, item_map, output_row, output_col)
        user_items_w = model.get_user_items_w(input_file)
        model.save_user_items_w(args, user_items_w)
    else:
        model.save_model(args, user_map, item_map, output_row, output_col)

    # log results
    train_rmse = wals.get_rmse(output_row, output_col, tr_sparse)
    test_rmse = wals.get_rmse(output_row, output_col, test_sparse)

    if args['hypertune']:
        # write test_rmse metric for hyperparam tuning
        util.write_hptuning_metric(args, test_rmse)

    tf.logging.info('train RMSE = %.2f' % train_rmse)
    tf.logging.info('test RMSE = %.2f' % test_rmse)
def train():
    data = load_dataset(dataset_path)
    print('Step1: Dataset is loaded successfully!')

    preprocessed_data = preprocessing(data)
    print('Step2: Data preprocessing done successfully!')

    train, test = train_test_split(preprocessed_data)
    print('Step3: Data splitted into train and test successfully!')

    train_X, train_Y, test_X, test_Y, vectorizer = feature_extraction(
        train, test)

    trained_model = model_training(train_X, train_Y)
    print('Step4: Model trained successfully successfully!')

    accuracy = model_testing(test_X, test_Y, trained_model)

    vec_classifier = Pipeline([('vectorizer', vectorizer),
                               ('classifier', trained_model)])

    save_model(vec_classifier)
    print('Step5: Model is deployed successfully')

    response = {
        'success': True,
        'message': 'Model deployed',
        'accuracy': accuracy
    }
    return response
def main(argv=None):
	if not os.path.exists(Constants.MODEL_DIR):
	    os.makedirs(Constants.MODEL_DIR)
	if not os.path.exists(Constants.TENSORBOARD_DIR):
	    os.makedirs(Constants.TENSORBOARD_DIR)

	with open(Constants.CHARLIST_FILE, "rb") as fp:
		charList = pickle.load(fp)
	lenCharList = len(charList)

	with tf.device("CPU:0"):
		train_ds, train_image_count = create_datasets(Constants.TRAIN_TFRECORD)
		val_ds, val_image_count = create_datasets(Constants.VAL_TFRECORDS)

	train_batches = int(np.floor(train_image_count/Constants.BATCH_SIZE))
	val_batches = int(np.floor(val_image_count/Constants.BATCH_SIZE))

	model = CRNN(lenCharList)

	global_step_op = tf.Variable(0)
	starter_learning_rate = 0.1
	learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step_op, decay_steps=10000, decay_rate=0.1, staircase=False)
	optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate)

	epoch = 1
	summary_writer = tf.contrib.summary.create_file_writer(Constants.TENSORBOARD_DIR, flush_millis=10000)
	with summary_writer.as_default(), tf.contrib.summary.always_record_summaries():
		while True:
			print("Epoch "+str(epoch))
			loss = train_on_batch(model, train_ds, train_batches, charList, optimizer)
			images, recognized, charErrorRate, wordAccuracy = validate_on_batch(model, val_ds, val_batches, charList, epoch)
			if charErrorRate < 15:
				save_model(model, epoch)
			write_to_tensorboard(epoch, images, recognized, loss, charErrorRate, wordAccuracy)
			epoch += 1
Exemple #12
0
def main(train=True):

    # data
    train_loader, valid_loader, test_loader = dataset.get_dataset()

    # loss function
    criterion = nn.CrossEntropyLoss()

    # optimizer
    model = load_model(name)
    if model.classifier is not None:
        fc_params_id = list(map(id, model.classifier.parameters()))
        base_params = filter(lambda p: id(p) not in fc_params_id, model.parameters())
        optimizer = optim.SGD([
            {'params': base_params, 'lr': LR * freeze_rate},  # 0
            {'params': model.classifier.parameters(), 'lr': LR}], momentum=momentum, weight_decay=weight_decay)
    elif model.fc is not None:
        fc_params_id = list(map(id, model.fc.parameters()))
        base_params = filter(lambda p: id(p) not in fc_params_id, model.parameters())
        optimizer = optim.SGD([
            {'params': base_params, 'lr': LR * freeze_rate},  # 0
            {'params': model.fc.parameters(), 'lr': LR}], momentum=momentum, weight_decay=weight_decay)
    else:
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=momentum, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    if train:
        writer = SummaryWriter(log_dir=log_dir, comment='test_tensorboard')
        if not RESUME:
            train_classifier(model, 100, train_loader, valid_loader, optimizer, criterion, scheduler, writer=writer)
        else:
            train_classifier_resume(model, optimizer, path_checkpoint, 100, train_loader, valid_loader, criterion,
                                scheduler, writer=writer)
        save_model(model, name, save_state_dic=True)
    else:
        state_dict = torch.load(model_path)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        inference_model = load_inference_model(name)
        inference_model.load_state_dict(state_dict)
        inference_model.to(device)
        inference_model.eval()
        pred_list1 = []
        pred_list2 = []
        pred_list3 = []
        with torch.no_grad():
            for i, data in enumerate(test_loader):
                images, _ = data
                images = images.to(device)

                # tensor to vector
                outputs = inference_model(images)
                _, pred_top3 = torch.topk(outputs, k=3, dim=-1)

                pred_list1.append(pred_top3.data[:, 0].cpu().numpy().reshape((1, -1)).tolist())
                pred_list2.append(pred_top3.data[:, 1].cpu().numpy().reshape((1, -1)).tolist())
                pred_list3.append(pred_top3.data[:, 2].cpu().numpy().reshape((1, -1)).tolist())
        lists = [pred_list1, pred_list2, pred_list3]
        inference.vector2label(lists)
Exemple #13
0
def save_model(args):
    """save_model(args: argparse.Namespace)
    Increments the save counter, and saves the global model.
    You must first set `model.sess`. See `model.save_model` for more info.
    """
    global model_save_counter
    model_save_counter += 1
    path = os.path.join(args.model_output_dir,
                        "model-%03i.npy" % model_save_counter)
    model.save_model(net, path)
Exemple #14
0
def do_quick_distillation(start_epoch=-1):
    seed_init()
    train_dataloader, eval_dataloader = getdataLoader()
    xlnet_config = XLNetConfig.from_json_file(config.xlnet_config_root)
    student = XlnetCloze(xlnet_config)
    soft_labels = pickle.load(open(config.soft_label_file, "rb"))
    optimizer_grouped_parameters = get_optimizer_group(student)
    num_train_steps = int(train_dataloader.dataset.__len__() / config.train_batch_size * config.num_train_epochs)
    optimizer = AdamW(optimizer_grouped_parameters, lr=config.xlnet_learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, 'max', verbose=True, factor=config.decay,
                                  min_lr=config.min_lr, patience=config.patience)
    load_model(start_epoch, student, optimizer)
    if config.n_gpu > 1:
        student = nn.DataParallel(student)
    student.to(config.device)
    student.train()
    ave_loss, ave_hard_loss, ave_soft_loss, ave_train_accr = get_watch_index()
    global_step = (start_epoch + 1) * num_train_steps

    for epoch in trange(start_epoch + 1, config.num_train_epochs):
        student.zero_grad()
        for batch in tqdm(train_dataloader):
            input_ids, attention_mask, position, option_ids, tags, labels = batch
            input_ids, attention_mask, position, option_ids, tags, labels = to_device(
                input_ids, attention_mask, position, option_ids, tags, labels
            )
            _, student_logits = student(input_ids, attention_mask, position, option_ids, tags, labels)
            teacher_probs = get_teacher_probs(soft_labels, tags).to(config.device)
            loss_hard = F.cross_entropy(student_logits, labels, reduction="mean")
            loss_soft, teacher_probs = cross_entropy_loss_with_temperature_v2(student_logits, teacher_probs,
                                                                              config.temperature)
            loss = config.alpha * loss_hard + (1.0 - config.alpha) * config.temperature * config.temperature * loss_soft

            loss.backward()
            ave_train_accr.add(cal_accr(student_logits, labels))
            ave_loss.add((config.alpha * loss_hard + (1.0 - config.alpha) * loss_soft).item())
            ave_soft_loss.add(loss_soft.item())
            ave_hard_loss.add(loss_hard.item())
            optimizer.step()
            optimizer.zero_grad()
            # ipdb.set_trace()
            show_watch_index(global_step, ave_teacher_accr=cal_accr(logits=teacher_probs, labels=labels))
            if (global_step + 1) % config.show_loss_step == 0:
                now_lrs = show_lr(optimizer)
                show_watch_index(global_step, ave_hard_loss=ave_hard_loss, now_lrs=now_lrs,
                                 ave_soft_loss=ave_soft_loss, ave_loss=ave_loss)
            if global_step <= num_train_steps * config.warmup_proportion:
                warmup_adajust(num_train_steps, global_step, optimizer)
            global_step += 1

        eval_accr = eval(student, eval_dataloader)
        show_watch_index(epoch, eval_accr=eval_accr, ave_train_accr=ave_train_accr)
        scheduler.step(eval_accr)
        save_model(epoch, student, optimizer)
Exemple #15
0
def main():
    print('--start--')

    # Folder Paths
    log_dir = './files/training_logs/'

    # Hyper parameters
    num_features = 5
    classes = ['Dead', 'Alive: Wrong Direction', 'Alive: Right Direction']
    num_classes = len(classes)

    epochs = 10
    batch_size = 128
    learning_rate = 0.01

    # Load Data
    x_train, y_train = Data.generate_data(80000, num_features, num_classes)
    x_valid, y_valid = Data.generate_data(16000, num_features, num_classes)

    # Build model
    model = Model.build_model(num_features, num_classes, learning_rate)

    # View model summary
    model.summary()

    # Check memory needed during the training process (not accurate)
    Model.get_model_memory_usage(batch_size, model)

    # Get optimizer name
    opt_name = model.optimizer.__class__.__name__
    # Get folder name
    hparam_str = make_hparam_string(opt_name, learning_rate, batch_size, epochs)
    log_dir += hparam_str
    output_dir = log_dir + 'model/'
    # Create folder
    prepare_dir(output_dir)

    # Train the model
    train(model, x_train, y_train, x_valid, y_valid, batch_size, epochs, log_dir)

    # Evaluate the model
    evaluate(model, classes, x_valid, y_valid, output_dir)

    # Save the model
    Model.save_model(model, classes, output_dir)

    # Test on game
    # test_in_game(model, 1000, False, True, 200)

    # Visualize
    # plt.show()

    print('--end--')
Exemple #16
0
def train():
    raw_data = pd.read_csv(train_data_path)
    data, labels = preprocess(raw_data)
    train_data, test_data, train_labels, test_labels = split_data(data, labels)

    trained_model = train_model(digit_recognition_model(), train_data,
                                train_labels, 15)

    result = eval(trained_model, test_data, test_labels)

    print('model accuracy:', result)
    save_model(model_path, trained_model)
Exemple #17
0
def main():
    ###############################################################################
    # Load data
    ###############################################################################
    d = util.Dictionary()
    if args.task == "train":
        logging.info("Reading train...")
        trncorpus = util.read_corpus(args.ftrn, d, True)
        d.freeze()  # no new word types allowed
        vocab_size = d.size()
        # save dict
        d.save_dict(fprefix + ".dict")
        logging.info("Reading dev...")
        devcorpus = util.read_corpus(args.fdev, d, False)

    elif args.task == "test":
        logging.info("Reading test...")
        d.load_dict(args.fdct)
        d.freeze()
        vocab_size = d.size()
        # load test corpus
        tstcorpus = util.read_corpus(args.ftst, d, False)

    ###############################################################################
    # Build the model
    ###############################################################################
    if args.task == "train":
        model_fname = fprefix + ".model"
        pretrained_model = None
        if args.fmod:
            # load pre-trained model
            pretrained_model = model.load_model(args.fmod, vocab_size,
                                                args.nclass, args.inputdim,
                                                args.hiddendim, args.nlayer,
                                                args.droprate)
            logging.info("Successfully loaded pretrained model.")

        trained = model.train(trncorpus, devcorpus, vocab_size, args.nclass,
                              args.inputdim, args.hiddendim, args.nlayer,
                              args.trainer, args.lr, args.droprate, args.niter,
                              args.logfreq, args.verbose, model_fname,
                              pretrained_model)
        dev_accuracy = model.evaluate(trained, devcorpus.docs)
        logging.info("Final Accuracy on dev: %s", dev_accuracy)
        model.save_model(trained, model_fname)

    else:
        trained_model = model.load_model(args.fmod, vocab_size, args.nclass,
                                         args.inputdim, args.hiddendim,
                                         args.nlayer, args.droprate)
        tst_accuracy = model.evaluate(trained_model, tstcorpus.docs)
        logging.info("Final Accuracy on test: %s", tst_accuracy)
Exemple #18
0
def train(confg: TrainingConfig) -> TrainingResults:
    """
    Train a new model based on the `config`, save it to the disk and return the training statistics.
    """
    train_dataset, validation_dataset, labels = get_datasets(confg.data_dir, confg.image_size, confg.batch_size)
    model = get_model(confg.image_size, len(labels))
    history = model.fit(
        train_dataset,
        validation_data=validation_dataset,
        epochs=confg.epochs
    ).history
    save_model(confg.model_path, model, labels)
    return TrainingResults(history['accuracy'], history['val_accuracy'], history['loss'], history['val_loss'])
    def test_save_model(self):
        """
        Test model saving
        """
        mock_model = self.mock_model
        save_model(mock_model, self.test_model_dir)
        files = os.listdir(self.test_model_dir)
        if files:
            test_ext = os.path.splitext(files[0])[1]

        # Saves a models
        self.assertTrue(files, "no model saved")
        self.assertEqual(".h5", test_ext, "model not saved as '.h5'")
Exemple #20
0
def main(_run, _log):
    args = argparse.Namespace(**_run.config)
    args = post_config_hook(args, _run)

    args.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")

    root = "./datasets"
    model = load_model(args)
    model = model.to(args.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)  # TODO: LARS

    train_sampler = None
    train_dataset = torchvision.datasets.STL10(root,
                                               split="unlabeled",
                                               download=True,
                                               transform=TransformsSimCLR())

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        drop_last=True,
        num_workers=args.workers,
        sampler=train_sampler,
    )

    tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"])
    os.makedirs(tb_dir)
    writer = SummaryWriter(log_dir=tb_dir)

    mask = mask_correlated_samples(args)
    criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device)

    args.global_step = 0
    args.current_epoch = 0
    for epoch in range(args.start_epoch, args.epochs):
        loss_epoch = train(args, train_loader, model, criterion, optimizer,
                           writer)

        writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch)
        if epoch % 10 == 0:
            save_model(args, model, optimizer)

        print(
            f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}"
        )
        args.current_epoch += 1

    ## end training
    save_model(args, model, optimizer)
def main(disp_text=True):

    if config.fresh_model:
        config.all_losses = []
        save_model(make_model())
        model = load_model()
        if disp_text: print('created model.', end=' ')
    else:
        model = load_model()
        if not model:
            save_model(make_model())
            model = load_model()
            if disp_text: print('created model.', end=' ')
        else:
            if disp_text: print('loaded model.', end=' ')

    data = load_data()
    data, data_dev = split_data(data)

    data = [d for i,d in enumerate(data) if i in [8,10,13,14]]
    print()
    seq_lens = [len(d) for d in data]
    print(f'seq lens: {seq_lens}')
    min_seq_len = min(seq_lens)
    print(f'min seq len: {min_seq_len}')
    if not config.max_seq_len or config.max_seq_len > min_seq_len:
        config.max_seq_len = min_seq_len
    data = [d[:config.max_seq_len] for d in data]

    # from random import choice
    # from torch import randn
    # data = [[randn(config.in_size) for _ in range(choice(range(config.max_seq_len//2,config.max_seq_len)))] for _ in range(10)]
    # data_dev = []
    # for d in data: print(len(d))

    if not config.batch_size or config.batch_size >= len(data):
        config.batch_size = len(data)
    elif config.batch_size < 1:
        config.batch_size = int(len(data)*config.batch_size)

    if disp_text: print(f'hm data: {len(data)}, hm dev: {len(data_dev)}, bs: {config.batch_size}, lr: {config.learning_rate}, \ntraining started @ {now()}')

    for ep in range(config.hm_epochs):

        for i, batch in enumerate(batchify_data(data)):

            train_on(model, batch)

    return model
    def test_load_model(self):
        """
        Test model loading
        """
        model = self.mock_model
        loaded_model = None
        save_model(model, self.test_model_dir)
        files = os.listdir(self.test_model_dir)
        if files:
            model_name = files[
                0]  # hardcoded, take first models from model_dir
            loaded_model = load_model(self.test_model_dir, model_name)

        # loaded models exists
        self.assertTrue(loaded_model, "no model loaded")
def celebrities_face_train(name, deploy=False):
    """
    get celebrities_face dataset and train model by dataset
    after train, it saves model and logs file
    :param name: name of model, used for file name
    :param deploy: if True, train uses train set and test set, else train uses train set and valid set, default False
    """
    batch_size = 32
    epoch = 100
    lr = 0.001
    smoothing = 0.1
    dset = dataset.get_dataset(
        './celebrities_face',
        [0.5893, 0.4750, 0.4330],
        [0.2573, 0.2273, 0.2134],
    )

    num_classes = len(dset.classes)

    hyperparam_dict = {
        'epoch': epoch,
        'lr': lr,
        'smoothing': smoothing,
        'num_classes': num_classes
    }

    meters = [utils.AverageMeter(), utils.AverageMeter(), utils.AverageMeter()]

    train_set, test_set, train_labels = dataset.train_test_set_split(
        dset, 'celebrities_face')
    if deploy:
        train_loader, valid_loader = dataset.train_valid_loader_split(
            train_set, train_labels, batch_size, valid_size=0.1)
    else:
        train_loader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   num_workers=2)
        valid_loader = torch.utils.data.DataLoader(test_set,
                                                   batch_size=32,
                                                   num_workers=2)

    train_model = model.get_inception_v3(num_classes=num_classes)
    result_log = train(train_model, train_loader, valid_loader, meters,
                       hyperparam_dict)

    model.save_model(train_model, name)
    save_log(result_log, name)
Exemple #24
0
def start_train_job():
    try:
        with open(param_path, 'r') as hp:
            hyperparameters = json.loads(hp.read())
        _model = model.train_model(training_path, hyperparameters)
        model.save_model(model_path, _model)
    except Exception as ex:
        # Write out an error file. This will be returned as the failureReason in the
        # DescribeTrainingJob result.
        trc = traceback.format_exc()
        with open(os.path.join(output_path, 'failure'), 'w') as s:
            s.write('Exception during training: ' + str(ex) + '\n' + trc)
        # Printing this causes the exception to be in the training job logs, as well.
        print('Exception during training: ' + str(ex) + '\n' + trc,
              file=sys.stderr)
        # A non-zero exit code causes the training job to be marked as Failed.
        sys.exit(255)
Exemple #25
0
def train_and_evaluate():
    train_data_gen, val_data_gen = load_data()
    model = create_model()

    history = model.fit_generator(
        train_data_gen,
        steps_per_epoch=(train_data_gen.samples // BATCH_SIZE),
        epochs=EPOCHS,
        validation_data=val_data_gen,
        validation_steps=(val_data_gen.samples // BATCH_SIZE)
    )

    acc = history.history["accuracy"]
    val_acc = history.history["val_accuracy"]

    plot_acc(EPOCHS, acc, val_acc)

    save_model(model)
Exemple #26
0
 def train(self, num_epoch, verbose=True):
     for epoch in range(1, num_epoch + 1, 1):
         loss = self._run_epoch(verbose)
     savedir = save_model(self.model, self.savedir, loss)
     return {
         'savedir': savedir,
         'throughout': self.stats.throughout,
         'running_avg': self.stats.running_avg
     }
Exemple #27
0
def main(args):

    tf.logging.set_verbosity(tf.logging.INFO)

    # input files
    input_file = util.ensure_local_file(args.train_file)
    user_map, item_map, tr_sparse, test_sparse = model.create_test_and_train_sets(
        input_file)

    # train model
    output_row, output_col = model.train_model(args, tr_sparse)

    # save trained model to job directory
    model.save_model(args, user_map, item_map, output_row, output_col)

    # log results
    test_rmse = wals.get_rmse(output_row, output_col, test_sparse)
    util.write_hptuning_metric(args, test_rmse)
Exemple #28
0
    def train_model_VGG(x_train, y_train, x_validate, y_validate, num_classes):
        # Build, compile, and fit the model
        model = build_fully_connected(input_shape=X['train'].shape[1:],
                                      num_classes=num_classes)
        adam = optimizers.Adam(lr=0.0001)
        model.compile(optimizer=adam,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        model_fit_history = model.fit(X['train'],
                                      Y['train'],
                                      batch_size=64,
                                      epochs=50,
                                      verbose=2,
                                      validation_data=(X['validate'],
                                                       Y['validate']))

        epochs = np.argmin(model_fit_history.history['val_loss']) + 1
        print(f'Stop training at {epochs} epochs')

        plots_loss_accuracy_from_training(
            model_fit_history
        )  # plots for loss and accuracy model after training

        # Merge training and validation data
        X_train = np.concatenate([x_train,
                                  x_validate])  #concatenate train dataset
        Y_train = np.concatenate([y_train,
                                  y_validate])  #concatenate validation dataset

        # Randomly shuffle X and Y
        shuffle_index = np.random.permutation(len(X_train))
        X_train = X_train[shuffle_index]
        Y_train = Y_train[shuffle_index]
        model = build_fully_connected(input_shape=X_train.shape[1:],
                                      num_classes=num_classes)
        model.compile(optimizer=adam,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        print('Train with Training dataset + Validation dataset as input.')
        model_fit_history = model.fit(
            X_train, Y_train, batch_size=64, epochs=epochs,
            verbose=0)  # train with trainign and validation dataset
        save_model(model, path_save_model_vgg, path_save_weight_vgg)
Exemple #29
0
def train(num_epochs):
    model_to_device(model)

    params = [p for p in model.parameters()]
    trainable = [p for p in params if p.requires_grad]
    print(f"{len(trainable)} of {len(params)} model parameters are trainable.")

    optimizer = torch.optim.SGD(params=trainable,
                                lr=0.005,
                                momentum=0.9,
                                weight_decay=0.0005)

    for epoch in range(num_epochs):
        print(f"Beginning epoch {epoch + 1} of {num_epochs}.")

        model.train()

        start = math.floor(time())

        with ForwardPassCounter(max=len(data_loader)) as counter:
            epoch_loss = 0
            for t_imgs, t_annotations in data_loader:
                t_imgs, t_annotations = input_to_device(t_imgs, t_annotations)

                loss_dict = model(t_imgs, t_annotations)
                losses = sum(loss for loss in loss_dict.values())

                optimizer.zero_grad()
                losses.backward()
                optimizer.step()
                epoch_loss += losses

                counter.next()

        print(epoch_loss)
        print(f"Epoch duration: {math.floor(time()) - start} seconds.")

        print(
            f"Overwriting ./checkpoints/rcnn_{session_id}.pt with newest weights...",
            end="")
        save_model(model, session_id)
        print("Done.")
Exemple #30
0
def checkpoint2model(checkpoint_path: str, model_dir: str):
    """
    Given a checkpoint file, generates a model file that can be loaded by run_TEDD1104.py script.
    Input:
     - checkpoint_path path of checkpoint file (checkpoint.pt)
     - model_path directory where the model is going to be saved (model.bin and model_hyperparameters.json)
    Output:
     """

    if not os.path.exists(model_dir):
        print(f"{model_dir} does not exits. We will create it.")
        os.makedirs(model_dir)

    print_message(f"Loading checkpoint: {checkpoint_path}")

    (
        tedd1104_model,
        _,
        _,
        _,
        running_loss,
        total_batches,
        total_training_examples,
        acc_dev,
        epoch,
        fp16,
        _,
    ) = model.load_checkpoint(path=checkpoint_path,
                              device=model.torch.device("cpu"))

    print(f">>>>>> Checkpoint info <<<<<<\n"
          f"Running loss: {running_loss/total_batches}\n"
          f"Num epochs: {epoch+1}\n"
          f"Total training examples: {total_training_examples}\n"
          f"Acc dev set: {round(acc_dev*100,2)}\n"
          f"FP16: {fp16}\n")

    print_message(f"Saving model in {model_dir}")

    model.save_model(model=tedd1104_model, save_dir=model_dir, fp16=fp16)

    print_message(f"Done!")
def main(args):
  # process input file
  input_file = util.ensure_local_file(args['train_files'][0])
  user_map, item_map, tr_sparse, test_sparse = model.create_test_and_train_sets(
      args, input_file, args['data_type'])

  # train model
  output_row, output_col = model.train_model(args, tr_sparse)

  # save trained model to job directory
  model.save_model(args, user_map, item_map, output_row, output_col)

  # log results
  train_rmse = wals.get_rmse(output_row, output_col, tr_sparse)
  test_rmse = wals.get_rmse(output_row, output_col, test_sparse)

  if args['hypertune']:
    # write test_rmse metric for hyperparam tuning
    util.write_hptuning_metric(args, test_rmse)

  tf.logging.info('train RMSE = %.2f' % train_rmse)
  tf.logging.info('test RMSE = %.2f' % test_rmse)
        filters=filters,
        feats_to_cache=feats_to_cache,
        testing=False,
        feat_score_weight=0.5,
        local_search_width=32,
        local_search_step=2,
        processing_time_ratio=2.0,
        adapt_improve=True,
        use_best_data=True,
        use_all_data=False,
        testing_dir='/tmp',
        n_thumbs=6,
        startend_clip=0.025)

    clip_finder = clip_finder.ClipFinder(
        None,
        scenedetect.detectors.ContentDetector(30.0),
        model.features.ObjectActionGenerator(),
        valence_weight=1.0,
        action_weight=0.25,
        custom_weight=0.5,
        processing_time_ratio=0.7,
        startend_clip=0.1,
        cross_scene_boundary=True,
        min_scene_piece=15)

    mod = model.Model(None, vid_searcher=video_searcher,
                      clip_finder=clip_finder)

    model.save_model(mod, options.output)
def remove_disk_cache(obj):
    '''Recusively removes disk caches from the object.'''
    for name, val in obj.__dict__.items():
        if isinstance(val, features.DiskCachedFeatures):
            obj.__dict__[name] = (
                features.MemCachedFeatures.create_shared_cache(
                val.feature_generator))
        else:
            try:
                remove_disk_cache(val)
            except AttributeError:
                pass

    return obj
    

if __name__ == '__main__':
    parser = OptionParser()

    parser.add_option('--output', '-o', default='neon.model',
                      help='File to output the model definition')
    parser.add_option('--input', '-i', default='neon.model',
                      help='File to input the model definition')
    
    options, args = parser.parse_args()

    model.save_model(
        remove_disk_cache(model.load_model(options.input)),
        options.output)
        '--projectId',
        help = 'ID (not name) of your project',
        required = True
    )
    parser.add_argument(
        '--job-dir',
        help = 'output directory for model, automatically provided by gcloud',
        required = True
    )
    
    args = parser.parse_args()
    arguments = args.__dict__
    
    model.PROJECT = arguments['projectId']
    model.KEYDIR  = 'trainer'
    
    estimator, rmse = model.train_and_evaluate(arguments['frac'],
                                         arguments['maxDepth'],
                                         arguments['numTrees']
                                        )
    loc = model.save_model(estimator, 
                           arguments['job_dir'], 'babyweight')
    print("Saved model to {}".format(loc))
    
    # this is for hyperparameter tuning
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='rmse',
        metric_value=rmse,
        global_step=0)
# done