Beispiel #1
0
def run_bc_single_env(env, num_rollouts=10, epochs=200):
    print(colored("ENV: %s" % env, 'green'))
    data = helper.run_expert(env, num_rollouts=num_rollouts)

    input_dim = len(data['observations'][0])
    output_dim = len(data['actions'][0])
    model = helper.build_model(input_dim, output_dim)

    rewards = []
    with tf.Session():
        with tf.variable_scope(env):
            sess = tf.get_default_session()
            sess.run(tf.global_variables_initializer())
            for epoch in range(epochs):
                policy_fn = bc.train_bc(sess,
                                        data,
                                        model=model,
                                        epochs=1,
                                        curr_epoch=epoch)

                _data = bc.run_bc(sess,
                                  env,
                                  policy_fn,
                                  num_rollouts=num_rollouts)

                rewards.append(_data['returns'])

            return rewards
def main():
    input_args = get_predict_input_args()
    
    # Load checkpoint
    checkpoint, validation_accuracy = load_checkpoint(input_args.checkpoint_path)
        
    useGPU = input_args.gpu is not None
        
    # Build model
    model = build_model(checkpoint["arch"],
                        checkpoint["hidden_units_01"], 
                        checkpoint["hidden_units_02"], 
                        checkpoint)

    # Process image
    processed_image = process_image(input_args.image_path)
  
    # Predict topK
    topk = predict(processed_image, model, input_args.top_k, useGPU)
    
    # Show result
    with open(input_args.category_names_path, 'r') as f:
        cat_to_name = json.load(f)
    
    probs = topk[0][0].cpu().numpy()
    categories = [cat_to_name[str(category_index+1)] for category_index in topk[1][0].cpu().numpy()]
    
    for i in range(len(probs)):
        print("TopK {}, Probability: {}, Category: {}\n".format(i+1, probs[i], categories[i]))
Beispiel #3
0
def main():
    
    # Get input arguments
    args = arg_parser()
    
    # Process and load the data/images
    image_datasets, dataloaders = helper.process_and_load_data(args.data_dir)
    print("The train, test & validation data has been loaded.".format(key))
    
    # Load the model
    model, optimizer, criterion = helper.build_model(args.arch, args.hidden_units, args.learning_rate)
    print("Model, optimizer & criterion have been loaded.")
    
    # Check if GPU is available
    device = helper.check_gpu(args.gpu)
    print('Using {} for computation.'.format(device))
    
    # Train and validate the model
    helper.train_and_validate_model(model, optimizer, criterion, dataloaders, device, args.epochs, print_every = 32)
    print("Training has been completed.")
    
    # Test the model
    helper.test_model(model, optimizer, criterion, dataloaders, device)
    print("Testing has been completed.")
    
    # Save the checkpoint
    helper.save_checkpoint(args.arch, model, args.epochs, args.hidden_units, args.learning_rate, image_datasets, args.save_dir)
    print("Model's checkpoint has been saved.")
Beispiel #4
0
def dagger(env, num_rollouts=1, epochs=1):
    data = helper.run_expert(env, num_rollouts=num_rollouts)
    input_dim = len(data['observations'][0])
    output_dim = len(data['actions'][0])
    model = helper.build_model(input_dim, output_dim)

    sess = tf.get_default_session()
    sess.run(tf.global_variables_initializer())

    rewards = []

    os.makedirs('checkpoints', exist_ok=True)

    for epoch in range(epochs):
        checkpoint_path = None
        if epoch == epochs - 1:
            checkpoint_path = helper.checkpoint_path(env, 'dagger-')
        policy_fn = bc.train_bc(sess,
                                data,
                                model=model,
                                curr_epoch=epoch,
                                epochs=1,
                                checkpoint_path=checkpoint_path)

        _data = bc.run_bc(sess,
                          env,
                          policy_fn,
                          num_rollouts=num_rollouts,
                          stats=False)
        _data['actions'] = helper.ask_expert_actions(env,
                                                     _data['observations'])
        rewards.append(_data['returns'])
        data = merge_data(data, _data)

    return policy_fn, rewards
Beispiel #5
0
def train_bc(sess,
             data,
             model=None,
             curr_epoch=None,
             epochs=1,
             batch_size=32,
             debug=False,
             checkpoint_path=None):
    mean, stdev = helper.mean_and_stdev(data['observations'])

    if model:
        m = model
        input_ph, output_ph = m['input_ph'], m['output_ph']
        mean_v, stdev_v = m['mean_v'], m['stdev_v']
        output_pred, mse, opt = m['output_pred'], m['mse'], m['opt']
    else:
        input_dim = len(data['observations'][0])
        output_dim = len(data['actions'][0])
        m = helper.build_model(input_dim, output_dim)
        input_ph, output_ph = m['input_ph'], m['output_ph']
        mean_v, stdev_v = m['mean_v'], m['stdev_v']
        output_pred, mse, opt = m['output_pred'], m['mse'], m['opt']

        sess.run(tf.global_variables_initializer())

    mean_v.load(mean, session=sess)
    stdev_v.load(stdev, session=sess)

    if checkpoint_path:
        saver = tf.train.Saver()

    # run training
    n_inputs = len(data['observations'])
    if debug:
        print(colored('n_inputs: %d' % n_inputs, 'red'))
    for epoch in range(epochs):
        for i in range(1_000):
            indices = np.random.randint(n_inputs, size=batch_size)

            input_batch = data['observations'][indices]
            output_batch = data['actions'][indices]

            _, mse_run = sess.run([opt, mse],
                                  feed_dict={
                                      input_ph: input_batch,
                                      output_ph: output_batch
                                  })

        if curr_epoch:
            print('epoch: {0:03d} mse: {1:.4f}'.format(curr_epoch, mse_run))
        else:
            print('epoch: {0:03d} mse: {1:.4f}'.format(epoch, mse_run))
        if checkpoint_path and epoch == epochs - 1:
            saver.save(sess, checkpoint_path)
Beispiel #6
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('env', type=str)
  parser.add_argument('--model_checkpoint', type=str)
  parser.add_argument('--render', type=bool, default=True)
  parser.add_argument('--max_timesteps', type=int)
  parser.add_argument('--num_rollouts', type=int, default=10)
  args = parser.parse_args()

  with tf.Session() as sess:
    with tf.variable_scope(args.env):
      input_dim, output_dim = helper.input_output_shape(args.env)
      model = helper.build_model(input_dim, output_dim)
      input_ph, output_pred = model['input_ph'], model['output_pred']

      policy_fn = tf_util.function([input_ph], output_pred)

      if args.model_checkpoint:
        checkpoint_path = args.model_checkpoint
      else:
        checkpoint_path = helper.checkpoint_path(args.env)

      saver = tf.train.Saver()
      saver.restore(sess, checkpoint_path)

      env = gym.make(helper.envname(args.env))
      max_steps = args.max_timesteps or env.spec.timestep_limit

      returns = []
      observations = []
      actions = []
      for i in range(args.num_rollouts):
        print('iter', i)
        obs = env.reset()
        done = False
        totalr = 0
        steps = 0
        while not done:
          action = policy_fn(obs[None, :])
          observations.append(obs)
          actions.append(action)
          obs, r, done, _ = env.step(action)
          totalr += r
          steps += 1
          if args.render:
            env.render()
          if steps >= max_steps:
            break
        returns.append(totalr)

      helper.print_returns_stats(returns)
def test(opts):
    source_vocab = vocabs.load_vocabs_from_file(opts.source_vocab)
    target_vocab = vocabs.load_vocabs_from_file(opts.target_vocab)

    test_dataset = Seq2SeqDataset(opts.testing_dir, source_vocab, target_vocab,
                                  opts.source_lang, opts.target_lang)
    test_dataloader = Seq2SeqDataLoader(
        test_dataset,
        test_dataset.source_pad_id,
        test_dataset.target_pad_id,
        batch_first=True,
        batch_size=opts.batch_size,
        shuffle=True,
        pin_memory=(opts.device.type == "cuda"),
        num_workers=4,
    )

    model = helper.build_model(
        opts,
        test_dataset.source_vocab_size,
        test_dataset.target_vocab_size,
        test_dataset.source_pad_id,
        test_dataset.target_sos,
        test_dataset.target_eos,
        test_dataset.target_pad_id,
        opts.device,
    )
    model.load_state_dict(torch.load(opts.model_path))
    model.eval()

    # The loss function
    loss_function = torch.nn.CrossEntropyLoss(
        ignore_index=test_dataset.target_pad_id)

    # Evaluate the model
    test_loss = evaluate_model_by_loss_function(model, loss_function,
                                                test_dataloader, opts.device)
    test_bleu = evaluate_model_by_bleu_score(
        model,
        test_dataloader,
        opts.device,
        test_dataset.target_sos,
        test_dataset.target_eos,
        test_dataset.target_pad_id,
        target_vocab.get_id2word(),
    )

    print(f"Test loss={test_loss}, Test Bleu={test_bleu}")
Beispiel #8
0
def main():
    # parse command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("data_dir", help="directory containing training data")
    parser.add_argument("--save_dir",
                        help="directory to save checkpoint",
                        default=os.path.dirname(os.path.abspath(__file__)))
    parser.add_argument("--arch",
                        help="choose architecture",
                        choices=model_choices,
                        default="vgg19")
    parser.add_argument("--learning_rate",
                        help="set learning rate",
                        type=float,
                        default=0.001)
    parser.add_argument("--hidden_units",
                        help="set hidden units",
                        default=[25088, 4096, 4096, 102])
    parser.add_argument("--epochs",
                        help="set number of epochs to train for",
                        type=int,
                        default=5)
    parser.add_argument("--gpu",
                        help="use GPU for training",
                        action="store_true")
    args = parser.parse_args()

    data_dir = args.data_dir
    save_dir = args.save_dir
    arch = args.arch
    lr = args.learning_rate
    hidden_units = args.hidden_units
    epochs = args.epochs
    cuda = args.gpu

    # load data
    trainloader, validloader, class_to_idx = load_data(data_dir)

    # TODO: Build and train your network
    model = build_model(arch, hidden_units)

    # TODO: Train a model with a pre-trained network
    model = train(model, epochs, lr, cuda, trainloader, validloader)

    # TODO: Save the checkpoint
    save_model(model, arch, hidden_units, save_dir, class_to_idx)
Beispiel #9
0
def test_build_model_input_norm():
  with tf.Session() as sess:
    with tf.variable_scope("test_build_model_input_norm"):
      m = helper.build_model(5, 3)
      input_ph, input_norm = m['input_ph'], m['input_norm']
      mean_v, stdev_v = m['mean_v'], m['stdev_v']
      sess.run(tf.global_variables_initializer())
      mean_v.load([0.5, 0.5, 0.5, 0.5, 0.5], session=sess)
      stdev_v.load([1, 1, 1, 1, 1], session=sess)
      values, = sess.run([input_norm], feed_dict={input_ph: [
        [1, 1, 1, 1, 1],
        [1, 2, 3, 4, 5],
      ]})
      expected_values = [[
        [0.5, 0.5, 0.5, 0.5, 0.5],
        [0.5, 1.5, 2.5, 3.5, 4.5],
      ]]
      assert (values - np.array(expected_values) < 0.01).all()
def predict(opts):

    # Get our current version of spacy
    spacy_instance = utils.get_spacy_instance(opts.source_lang)

    # Make the text lowercase and no EOF
    input_text = opts.input_text.lower().strip()

    # Parse input into tokens with spacy
    input_tokens = [
        token.text for token in spacy_instance.tokenizer(input_text)
    ]

    print("Input:", " ".join(input_tokens))

    # Get the vocabs
    # TODO: Handle the case of translating from fr to en
    source_vocab = vocabs.load_vocabs_from_file(opts.source_vocab)
    target_vocab = vocabs.load_vocabs_from_file(opts.target_vocab)

    # Get the mappings
    source_word2id = source_vocab.get_word2id()
    target_word2id = target_vocab.get_word2id()

    source_id2word = source_vocab.get_id2word()
    target_id2word = target_vocab.get_id2word()

    source_vocab_size = len(source_word2id) + 2
    target_vocab_size = len(target_word2id) + 4

    src_unk, src_pad = range(len(source_word2id), source_vocab_size)
    trg_unk, trg_sos, trg_eos, trg_pad = range(len(target_word2id),
                                               target_vocab_size)

    model = helper.build_model(
        opts,
        source_vocab_size,
        target_vocab_size,
        src_pad,
        trg_sos,
        trg_eos,
        trg_pad,
        opts.device,
    )
    model.load_state_dict(torch.load(opts.model_path))
    model.eval()

    src = [torch.tensor([source_word2id[word] for word in input_tokens])]
    src_lens = torch.tensor([len(input_tokens)])
    src = torch.nn.utils.rnn.pad_sequence(src, padding_value=src_pad)

    predicted_words = None
    with torch.no_grad():

        # Get the output
        logits = model(src, src_lens)
        predicted_trg = logits.argmax(2)[0, :]

        # Remove the EOS and SOS
        predicted_trg = predicted_trg[1:-1]

        # Get the resultant sequence of words
        predicted_words = [
            target_id2word.get(word_id.item(), "NAN")
            for word_id in predicted_trg
        ]

    return predicted_words
def main():
    input_args = get_train_input_args()

    # Create & adjust data
    train_dir = input_args.data_dir + '/train'
    valid_dir = input_args.data_dir + '/valid'
    test_dir = input_args.data_dir + '/test'

    print("\n\n Trainings folder: {}".format(train_dir))
    print(" Validation folder: {}".format(valid_dir))
    print(" Test folder: {}\n".format(test_dir))

    train_transforms = transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    test_transforms = transforms.Compose([
        transforms.Resize(255),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_data = datasets.ImageFolder(train_dir, transform=train_transforms)
    valid_data = datasets.ImageFolder(valid_dir, transform=test_transforms)
    test_data = datasets.ImageFolder(test_dir, transform=test_transforms)

    trainloader = torch.utils.data.DataLoader(train_data,
                                              batch_size=32,
                                              shuffle=True)
    validloader = torch.utils.data.DataLoader(valid_data, batch_size=32)
    testloader = torch.utils.data.DataLoader(test_data, batch_size=32)

    # Load checkpoint
    checkpoint = None
    best_accuracy = 0

    if input_args.checkpoint_path is not None:
        checkpoint, best_accuracy = load_checkpoint(input_args.checkpoint_path)

    useGPU = input_args.gpu is not None

    arch = input_args.arch if checkpoint is None else checkpoint["arch"]
    hidden_units_01 = input_args.hidden_units_01 if checkpoint is None else checkpoint[
        "hidden_units_01"]
    hidden_units_02 = input_args.hidden_units_02 if checkpoint is None else checkpoint[
        "hidden_units_02"]

    # Build model
    model = build_model(arch, hidden_units_01, hidden_units_02, checkpoint)

    # Train model
    print("\n\nStart Training...\n")

    if best_accuracy > 0:
        print("Last validation accuracy: {}".format(best_accuracy))

    epochs = input_args.epochs
    learning_rate = input_args.learning_rate
    steps = 0
    running_loss = 0
    print_every = 10

    train_losses, validation_losses = [], []

    # Use GPU if it's available and gpu is not None
    device = torch.device(
        "cuda" if torch.cuda.is_available() and useGPU else "cpu")
    print(f"Device: {device}")

    criterion = nn.NLLLoss()

    # Only train the classifier parameters, feature parameters are frozen
    optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)

    model.to(device)

    for epoch in range(epochs):
        for inputs, labels in trainloader:
            steps += 1
            # Move input and label tensors to the default device
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            logps = model.forward(inputs)
            loss = criterion(logps, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if steps % print_every == 0:
                validation_loss = 0
                accuracy = 0

                model.eval()

                with torch.no_grad():
                    for inputs, labels in validloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        logps = model.forward(inputs)
                        batch_loss = criterion(logps, labels)

                        validation_loss += batch_loss.item()

                        # Calculate accuracy
                        ps = torch.exp(logps)
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(
                            torch.FloatTensor)).item()

                print(
                    f"Epoch {epoch+1}/{epochs}, "
                    f"Steps {steps}\n"
                    f"Train loss: {running_loss/print_every:.3f}, "
                    f"Validation loss: {validation_loss/len(validloader):.3f}, "
                    f"Validation accuracy: {accuracy/len(validloader):.3f}\n")

                train_losses.append(running_loss / print_every)
                validation_losses.append(validation_loss / len(validloader))

                if best_accuracy < accuracy / len(
                        validloader) and accuracy / len(validloader) > 0.6:
                    best_accuracy = accuracy / len(validloader)
                    path = input_args.save_dir + "/checkpoint_best_accuracy.pth"

                    save_checkpoint(model, train_data, path, best_accuracy,
                                    input_args.arch, hidden_units_01,
                                    hidden_units_02)

                running_loss = 0

    print("\n\nEnd Training...\n")

    # Test trained model
    test_model(model, testloader)
Beispiel #12
0
def test_build_model():
  with tf.Session():
    with tf.variable_scope("test_build_model"):
      m = helper.build_model(11, 3)
      assert m != None
Beispiel #13
0
parser.add_argument('--gpu', dest="gpu", action="store", default="gpu")

parser = parser.parse_args()
data_dir = parser.data_directory
save_dir = parser.save_dir
arch = parser.arch
learning_rate = parser.learning_rate
hidden_units = parser.hidden_units
epochs = parser.epochs
gpu = parser.gpu

# 1) Load Data
image_datasets, trainloader, testloader, validloader = helper.loadData(
    data_dir)
# 2) Build Model
model = helper.build_model(arch, hidden_units)
# 3) Train Model
model, optimizer, criterion = helper.train_model(model, trainloader,
                                                 validloader, learning_rate,
                                                 epochs, gpu)
# 4) Save the checkpoint
model.to('cpu')
model.class_to_idx = image_datasets['train_data'].class_to_idx
checkpoint = {
    'model': model,
    'hidden_units': hidden_units,
    'optimizer_state_dict': optimizer.state_dict,
    'criterion': criterion,
    'epochs': epochs,
    'state_dict': model.state_dict(),
    'class_to_idx': model.class_to_idx
import argparse
import helper

parser = argparse.ArgumentParser()
parser.add_argument('data_dir',nargs = '?',type = str, default = './flowers/')
parser.add_argument('--gpu',dest = 'gpu',action = 'store_true',default = False)
parser.add_argument('--save_dir',dest = 'save_dir',action = 'store',default = './checkpoint.pth')
parser.add_argument('--arch',dest = 'arch',action = 'store',default ='vgg16')
parser.add_argument('--learning_rate',dest ='learning_rate',action = 'store',default = 0.001,type = float)
parser.add_argument('--hidden_units',dest = 'hidden_units',action = 'store',default = 1024, type = int )
parser.add_argument('--epochs',dest = 'epochs',action = 'store',default = 20,type = int)

args = parser.parse_args()

# load data
train_data,trainloader, testloader,validloader = helper.load_data()

# build model
print(args.gpu)
print(args.arch)
print(type(args.hidden_units))
print(type(args.learning_rate))
model,device,criterion,optimizer = helper.build_model(args.gpu,args.arch,args.hidden_units,args.learning_rate)

# train model
helper.train_model(args.epochs,trainloader,validloader,model,device,criterion,optimizer)

# save the trained model
helper.save_checkpoint(model,args.epochs,args.arch,optimizer,train_data)

                    param_grid)

    # defining parameter range - using only odd numbers
    param_grid = {'n_neighbors': np.arange(1, 42, 2)}
    find_best_param(KNeighborsClassifier, 'Survived', FEATURES, titanic_df,
                    param_grid)


# Need to run only once to get best params for respective classifier
find_best()

result_dict = {
    'Survived - kNearestNeighbors':
    build_model(knearest_neighbors_fn,
                'Survived',
                FEATURES,
                titanic_df,
                options={'k': 33}),
    'Survived - Linear SVM':
    build_model(linear_svm_fn,
                'Survived',
                FEATURES,
                titanic_df,
                options={
                    'C': 0.1,
                    'loss': 'squared_hinge'
                }),
    'Survived - SVM Linear':
    build_model(svm_linear_fn,
                'Survived',
                FEATURES,
Beispiel #16
0
"""
    Create sequences that will be used as the input to the network.
    Create next_chars array that will serve as the labels during the training.
"""
sequences, next_chars = helper.create_sequences(text, SEQUENCE_LENGTH,
                                                SEQUENCE_STEP)
char_to_index, indices_char = helper.get_chars_index_dicts(chars)
"""
    The network is not able to work with characters and strings, we need to vectorise.
"""
X, y = helper.vectorize(sequences, SEQUENCE_LENGTH, chars, char_to_index,
                        next_chars)
"""
    Define the structure of the model.
"""
model = helper.build_model(SEQUENCE_LENGTH, chars)
"""
    Train the model
"""

# model.fit(X, y, batch_size=128, nb_epoch=EPOCHS)
model = load_model(
    "final.h5")  # you can skip training by loading the trained weights

for diversity in [0.2, 0.5, 1.0, 1.2]:
    print()
    print('----- diversity:', diversity)

    generated = ''
    # insert your 40-chars long string. OBS it needs to be exactly 40 chars!
    sentence = "The grass is green and my car is red lik"
Beispiel #17
0
    param_grid = {'loss': ['deviance', 'exponential'], 'criterion': ['friedman_mse', 'mse', 'mae'], 'n_estimators': [10, 20, 50, 100, 200], 'learning_rate': [1, 0.1, 0.01, 0.001],}
    find_best_param(GradientBoostingClassifier, 'income', FEATURES, adult_df, param_grid)

    # defining parameter range
    param_grid = {'criterion': ['gini', 'entropy'], 'max_depth': np.arange(1, 10, 1), 'min_samples_split': np.arange(2, 21, 1)}
    find_best_param(DecisionTreeClassifier, 'income', FEATURES, adult_df, param_grid)

    # defining parameter range - using only odd numbers
    param_grid = {'n_neighbors': np.arange(1, 42, 2)}
    find_best_param(KNeighborsClassifier, 'income', FEATURES, adult_df, param_grid)

# Need to run only once to get best params for respective classifier
find_best()

result_dict = {
               'income - kNearestNeighbors': build_model(knearest_neighbors_fn, 'income', FEATURES, adult_df, options={'k': 35}),
               'income - Linear SVM': build_model(linear_svm_fn, 'income', FEATURES, adult_df, options={'C': 0.1, 'loss': 'hinge'}),
               'income - SVM Linear': build_model(svm_linear_fn, 'income', FEATURES, adult_df, options={'C': 1, 'gamma': 0.1}),
               'income - SVM RBF': build_model(svm_rbf_fn, 'income', FEATURES, adult_df, options={'C': 1, 'gamma': 0.1}),
               'income - Ada Boosting': build_model(ada_boosting_fn, 'income', FEATURES, adult_df, options={'algorithm': 'SAMME.R', 'learning_rate': 1, 'n_estimators': 500}),
               'income - Gradient Boosting': build_model(gradient_boosting_fn, 'income', FEATURES, adult_df, options={'criterion': 'friedman_mse', 'learning_rate': 0.1, 'loss': 'exponential', 'n_estimators': 100}),
               'income - Neural networks': build_model(neural_network_fn, 'income', FEATURES, adult_df, options={'activation':'tanh', 'learning_rate':'invscaling', 'solver': 'adam'}),
               'income - Decision_tree': build_model(decision_tree_fn, 'income', FEATURES, adult_df, options={'criterion': 'gini', 'max_depth': 3, 'min_samples_split': 2})
               }

# Running code with default values
plt = print_results(result_dict)
#plt.show()
plt.savefig(fig_path + 'results.png')

title = "Learning Curves for Decision Tree"
                                                        random_state=42)

#Build the model
input_shape = features.shape[1]
n_epochs = 5000
width = 30
height = 10

bestRMSLEForm = [0.051880246254638995, 28, 0]
bestRMSLEBand = [0.03375373930387272, 25, 0]

for j in range(1, height + 1):

    for i in range(1, width + 1):

        modelForm = build_model(input_shape, i, j)
        modelBand = build_model(input_shape, i, j)

        #Train the model
        print("training for width {}, height {} has started".format(i, j))

        early_stopForm = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                       patience=200)
        historyForm = modelForm.fit(
            X_trainF,
            y_trainF,
            epochs=n_epochs,
            validation_split=0.2,
            verbose=0,
            callbacks=[early_stopForm, PrintProgress()])
                        action="store",
                        type=int,
                        default=5,
                        help="number of epochs for training, default 5")
    parser.add_argument("-d"
                        "--device",
                        dest='device',
                        action="store",
                        type=str,
                        default="cuda",
                        help="device for training,default cuda")
    args = parser.parse_args()
    train_datasets, trainloader, validloader, testloader = process_data(
        args.data_dir)

    model = build_model(args.arch, args.hidden_units, args.output_units)
    running_losses, running_valid_losses, trained_model = train_model(
        args.data_dir, model, args.learning_rate, args.epochs, args.device)
    test_data(trained_model, args.data_dir, args.device)
    trained_model.class_to_idx = train_datasets.class_to_idx
    #device = torch.to("cuda" if torch.cuda.is_available() and args.device == 'cuda' else "cpu")
    #trained_model.to(device)
    torch.save(
        {
            'arch': args.arch,
            'hidden_units': args.hidden_units,
            'output_units': args.output_units,
            'state_dict': trained_model.state_dict(),
            'class_to_idx': trained_model.class_to_idx
        }, args.save_dir)
Beispiel #20
0
    Create next_chars array that will serve as the labels during the training.
"""
word_sequences, next_words = helper.create_word_sequences(
    words, WORD_SEQUENCE_LENGTH, WORD_SEQUENCE_STEP)
word_to_index, indices_word = helper.get_chars_index_dicts(unique_words)

# """
#     The network is not able to work with characters and strings, we need to vectorise.
# """
X, y = helper.vectorize(word_sequences, WORD_SEQUENCE_LENGTH, unique_words,
                        word_to_index, next_words)

# """
#     Define the structure of the model.
# """
model = helper.build_model(WORD_SEQUENCE_LENGTH, unique_words)

# """
#     Train the model
# """

model.fit(X, y, batch_size=128, nb_epoch=EPOCHS)
# model = load_model("final.h5")  # you can skip training by loading the trained weights

for diversity in [0.2, 0.5, 1.0, 1.2]:
    print()
    print('----- diversity:', diversity)

    generated = []
    sentence = ['amalia', 'kamalia', 'tansseja']
    generated += sentence
def train(opts):
    """ Trains the model """
    torch.manual_seed(opts.seed)

    source_vocab = vocabs.load_vocabs_from_file(opts.source_vocab)
    target_vocab = vocabs.load_vocabs_from_file(opts.target_vocab)

    dataset = Seq2SeqDataset(
        opts.training_dir,
        source_vocab,
        target_vocab,
        opts.source_lang,
        opts.target_lang,
    )

    num_training_data = int(len(dataset) * opts.train_val_ratio)
    num_val_data = len(dataset) - num_training_data

    train_dataset, val_dataset = torch.utils.data.random_split(
        dataset, [num_training_data, num_val_data])

    train_dataloader = Seq2SeqDataLoader(
        train_dataset,
        dataset.source_pad_id,
        dataset.target_pad_id,
        batch_first=True,
        batch_size=opts.batch_size,
        shuffle=True,
        pin_memory=(opts.device.type == "cuda"),
        num_workers=4,
    )
    val_dataloader = Seq2SeqDataLoader(
        val_dataset,
        dataset.source_pad_id,
        dataset.target_pad_id,
        batch_first=True,
        batch_size=opts.batch_size,
        shuffle=True,
        pin_memory=(opts.device.type == "cuda"),
        num_workers=4,
    )

    model = helper.build_model(
        opts,
        dataset.source_vocab_size,
        dataset.target_vocab_size,
        dataset.source_pad_id,
        dataset.target_sos,
        dataset.target_eos,
        dataset.target_pad_id,
        opts.device,
    )

    patience = opts.patience
    num_epochs = opts.epochs

    if opts.patience is None:
        patience = float("inf")
    else:
        num_epochs = float("inf")

    best_val_loss = float("inf")

    num_poor = 0
    epoch = 1

    optimizer = torch.optim.Adam(model.parameters(), lr=opts.learning_rate)

    if opts.resume_from_checkpoint and os.path.isfile(
            opts.resume_from_checkpoint):
        print("Loading from checkpoint")
        best_val_loss, num_poor, epoch = load_checkpoint(
            opts.resume_from_checkpoint, model, optimizer)
        print(
            f"Previous state > Epoch {epoch}: Val loss={best_val_loss}, num_poor={num_poor}"
        )

    while epoch <= num_epochs and num_poor < patience:

        # Train
        loss_function = nn.CrossEntropyLoss(ignore_index=dataset.target_pad_id)
        train_loss = train_for_one_epoch(model, loss_function, optimizer,
                                         train_dataloader, opts.device)

        # Evaluate the model
        val_loss = test.evaluate_model_by_loss_function(
            model, loss_function, val_dataloader, opts.device)

        print(f"Epoch {epoch}: Train loss={train_loss}, Val loss={val_loss}")

        model.cpu()
        if val_loss > best_val_loss:
            num_poor += 1
        else:
            num_poor = 0
            best_val_loss = val_loss

            print("Saved model")
            torch.save(model.state_dict(), opts.model_path)

        save_checkpoint(
            opts.save_checkpoint_to,
            model,
            optimizer,
            best_val_loss,
            num_poor,
            epoch,
        )
        print("Saved checkpoint")

        model.to(opts.device)

        epoch += 1

    if epoch > num_epochs:
        print(f"Finished {num_epochs} epochs")
    else:
        print(f"Loss did not improve after {patience} epochs")

    val_bleu_score = test.evaluate_model_by_bleu_score(
        model,
        val_dataloader,
        opts.device,
        dataset.target_sos,
        dataset.target_eos,
        dataset.target_pad_id,
        target_vocab.get_id2word(),
    )
    print(f"Final BLEU score: {val_bleu_score}. Done.")