コード例 #1
0
def main_eval(args):
    assert args.load_from is not None, '--load_from required in eval mode'

    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.INFO)
    dataset_train, dataset_test, scaler = get_data(args)

    logging.info(f'evaluation mode. Level: {args.level}')

    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    n_features = dataset_train.items.shape[1]
    generator, discriminator = get_models(args, n_features, device)

    experiment = Experiment(args.comet_api_key,
                            project_name=args.comet_project_name,
                            workspace=args.comet_workspace)
    experiment.log_parameters(vars(args))

    load_model(Path(args.load_from), generator, discriminator, None, None,
               device)

    n_events = len(dataset_test)
    steps = (args.gan_test_ratio * n_events) // args.eval_batch_size

    evaluate_model(generator, experiment, dataset_test, args.eval_batch_size,
                   steps, args, device, scaler, 0)
コード例 #2
0
def train(restore):
    encoders = get_encoders()
    dataset = get_dataset(encoders, difficulty=10)
    text_rnn, generator, discriminator, gan = get_models(encoders)

    checkpoint_path = path.join(config.CHECKPOINT_DIR, "keras",
                                "text_rnn.ckpt")
    if restore:
        text_rnn.load_weights(checkpoint_path)

    logger = EvaluationLogger(generator, dataset, encoders)
    accumulator = MetricsAccumulator(path.join(config.LOG_DIR, "stats"))

    _train_on_batch_f = _get_train_on_batch_f(generator, discriminator, gan,
                                              accumulator)

    difficulty = 10
    dataset = get_dataset(encoders, difficulty)
    train_data = dataset.batch(config.BATCH_SIZE).take(config.STEPS_PER_EPOCH)
    for epoch in range(config.NUM_EPOCHS):
        # if epoch >= 500 and epoch % 10==0:
        #     difficulty += 1
        #     dataset = get_dataset(encoders, difficulty)
        #     train_data = dataset.batch(config.BATCH_SIZE).take(config.STEPS_PER_EPOCH)
        start_time = time.time()
        discr_only_steps = 0  # if epoch < 500 else 1
        for b, (text_inputs_dict, images) in enumerate(train_data):
            print(f"{b} completed", end="\r")
            train_part = TRAIN_D if epoch < 5 else \
                        TRAIN_GD if b%(discr_only_steps+1) == 0 else TRAIN_D
            _train_on_batch_f(text_inputs_dict, images, train_part)
        accumulator.accumulate(epoch)
        logger.on_epoch_end(epoch)
        logging.info(
            "Done with epoch %s took %ss (difficulty=%s; discr_only_steps=%s)",
            epoch, round(time.time() - start_time,
                         2), difficulty, discr_only_steps)
コード例 #3
0
def train_model(config):
    PATH = os.path.join('{}{}'.format(config['modelroot'],
                                      config['num_used_classes']))
    outputs_path = os.path.join(PATH, config['model_name'])
    print('Making dir: {}'.format(outputs_path))
    os.makedirs(outputs_path, exist_ok=True)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    model, input_size = get_models(config)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model = model.to(device)
    total_params = sum(p.numel() for p in model.parameters()
                       if p.requires_grad)
    print('Total training parameters:', total_params)

    #return
    Xtrain, ytrain, categories = load_data(config, load_train=True)
    Xtest, ytest, _ = load_data(config, load_train=False)
    Xtrain, Xval, ytrain, yval = train_test_split(Xtrain,
                                                  ytrain,
                                                  test_size=config['val_size'],
                                                  random_state=config['seed'])

    transform1 = transforms.Compose([
        transforms.Resize((input_size, input_size),
                          interpolation=Image.NEAREST)
    ])
    transform2 = {
        'train':
        transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
        'val':
        transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
    }
    traindata = Pokemons(Xtrain,
                         ytrain,
                         categories,
                         transform1=transform1,
                         transform2=transform2['train'])
    valdata = Pokemons(Xval,
                       yval,
                       categories,
                       transform1=transform1,
                       transform2=transform2['val'])
    testdata = Pokemons(Xtest,
                        ytest,
                        categories,
                        transform1=transform1,
                        transform2=transform2['val'])

    trainloader = DataLoader(traindata,
                             batch_size=config['batch_size'],
                             shuffle=True,
                             num_workers=4)
    valloader = DataLoader(valdata,
                           batch_size=config['batch_size'],
                           shuffle=False,
                           num_workers=4)

    optimizer = optim.Adam(model.parameters(),
                           lr=config['lr'],
                           weight_decay=config['weight_decay'])
    criterion = nn.CrossEntropyLoss().to(device)

    start_time = time.time()

    TRAINLOSS = []
    TRAINACC = []
    VALLOSS = []
    VALACC = []

    for epoch in range(1, config['epochs'] + 1):
        trainloss, trainacc = running(model,
                                      trainloader,
                                      optimizer,
                                      criterion,
                                      device,
                                      len(traindata),
                                      train=True)
        valloss, valacc = running(model,
                                  valloader,
                                  optimizer,
                                  criterion,
                                  device,
                                  len(valdata),
                                  train=False)
        end_time = time.time()
        print('Epochs {}/{}: Time={:.2f}\n\tTrain loss {:.4f}, Train acc {:.4f}\n\t'\
              'Val loss {:.4f}, Val acc {:.4f}'.format(epoch, config['epochs'], end_time - start_time,
                                                       trainloss, trainacc, valloss, valacc))
        start_time = time.time()

        TRAINLOSS.append(trainloss)
        TRAINACC.append(trainacc)
        VALLOSS.append(valloss)
        VALACC.append(valacc)

    # testing
    ypred = testing(model, testdata, device)
    with open(os.path.join(PATH, config['logs']), 'a+') as f:
        f.write('model: {}, acc: {}\n'.format(config['model_name'],
                                              accuracy_score(ypred, ytest)))

    torch.save(model, os.path.join(outputs_path, 'model_final.pt'))
    pickle.dump(TRAINLOSS,
                open(os.path.join(outputs_path, 'trainloss.pkl'), 'wb'))
    pickle.dump(TRAINACC, open(os.path.join(outputs_path, 'trainacc.pkl'),
                               'wb'))
    pickle.dump(VALLOSS, open(os.path.join(outputs_path, 'valloss.pkl'), 'wb'))
    pickle.dump(VALACC, open(os.path.join(outputs_path, 'valacc.pkl'), 'wb'))
コード例 #4
0
ファイル: test.py プロジェクト: xflashxx/seq2seq_nmt_att
from tensorflow.keras.preprocessing.sequence import pad_sequences
from greedy_approach import greedy_translation
from bleu import list_bleu
from rouge import Rouge
from sklearn.metrics import accuracy_score

checkpoint_file = 'saved_models/best_model_acc.h5'

if not os.path.isfile(checkpoint_file):
    print("Pretraining of the model required. Aborting...")
    sys.exit()

if __name__ == "__main__":

    # Load pretrained model
    translation_model, encoder, decoder = get_models(for_training=False)
    translation_model.load_weights(checkpoint_file)

    # compute Scores
    if K.ADVANCED_METRICS:
        ''' To compute the accuracy, we need: target_ref_sequences = tokenized sentences from original sentences
                                              target_hyp_sequences = predicted tokens for each sentence

            To compute BLEU and ROUGE, we need: target_ref_sentences = original sentences
                                                target_hyp_sentences = decoded predicted sentences
        '''
        print("Computing BLEU, ROUGE and Accuracy scores...")

        # from all sentences (1.920.209), use the next x sentences not used for training the model to evaluate
        # the model performance, with x = 'K.NUMBER_TEST_SENTENCES'
        if K.NUMBER_SENTENCES is not None:
コード例 #5
0
def main(input_filepath, output_filepath):
    """
    This module trains the Random Forest Classifier model if it does not yet exist,
    or if it does exist updates the model and selects the best performing one for production.
    """
    logger = logging.getLogger(__name__)
    logger.info('making final data set from raw data')

    project_dir = Path(__file__).resolve().parents[2]

    # Train a new version of the model 
    data = get_dataset(os.path.join(project_dir, input_filepath))

    x_cols = ['ReleaseNumber', 'New_Release_Flag', 'StrengthFactor',
              'PriceReg', 'ReleaseYear', 'ItemCount', 'LowUserPrice', 'LowNetPrice',
              'MarketingTypeD']

    y_col = 'SoldFlag'

    X_train, X_test, y_train, y_test = train_test_split(data[x_cols],
                                                        data[y_col].values,
                                                        test_size=0.30,
                                                        random_state=42)

    clf = RandomForestClassifier(max_depth=5,
                                 random_state=42,
                                 criterion='gini',
                                 n_estimators=100,
                                 verbose=1,
                                 class_weight='balanced')

    clf.fit(X_train, y_train)

    # Evaluate model
    now = datetime.now()  # current date and time
    date_time = now.strftime("%m-%d-%Y_%H-%M-%S")

    plot_filename = f'model-{date_time}-evaluation-plots.png'
    classifier_model_plot(X_test, y_test, clf, (20, 10), output_filepath, plot_filename)

    # Save to file
    pkl_filename = f"model-{date_time}.pkl"
    with open(os.path.join(output_filepath, pkl_filename), 'wb') as file:
        pickle.dump(clf, file)

    # Add to model collection and compare for best model
    models = get_models(os.path.join(project_dir, output_filepath))

    clf_model = Model(pkl_filename, clf, get_model_auc(X_test, y_test, clf), plot_filename)
    clf_model.add_to_metadata(output_filepath)

    if len(models) > 0:
        models.append(clf_model)
        best_model = compare_models(models, X_test, y_test)
    else:
        best_model = clf_model

    if best_model.file_name == pkl_filename:
        logger.info(f'Currently trained model {pkl_filename} was deemed best model! Moved model to production.')
    else:
        logger.info(f'Earlier trained model {best_model.file_name} was deemed best model. Moved model to production.')

    logger.info(f'\nThe best model\'s properties are as follows: \n \n {best_model}')

    move_to_production(output_filepath, best_model)
コード例 #6
0
        features.append(float(rm))
        features.append(float(age))
        features.append(float(dis))
        features.append(float(rad))
        features.append(float(tax))
        features.append(float(ptratio))
        features.append(float(b))
        features.append(float(lstat))

        # f = [0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98]

        print('Predicted Value:',
              model.predict(np.asarray(features).reshape(1, -1))[0])
        # print('Predicted Value:', model.predict(features)[0])
    except Exception as err:
        print('An error has occured.', err)


if __name__ == '__main__':
    # load dataset
    dataset = DataSet.load()

    # split dataset into training and testing subsets
    training_features, testing_features, training_target, testing_target = dataset.split_train_test(
    )

    # list of Generalised ML Models
    models = get_models(training_features, training_target)

    show_main_menu()
コード例 #7
0
from model import get_models
from keras.optimizers import Adam
import sys
from PIL import Image
import numpy as np
from os.path import join
import os

sys.path.append("../..")
import dataset

autoencoder, encoder, decoder = get_models(input_shape=(100, 100, 1), latent_dim=64)
print("Nº of parameters: {}".format(autoencoder.count_params()))

images = dataset.load_png_dataset(sample=100, resize=(300, 100), conversion="L", strip_mode="1x3")
images = (images/255).astype(np.float16)

from keras.losses import mean_squared_error, cosine
loss1 = cosine(autoencoder.layers[0].input, autoencoder.layers[-2].output)
loss2 = mean_squared_error(autoencoder.layers[0].input, autoencoder.layers[-2].output)
autoencoder.compile(metrics=["mse"], optimizer=Adam(lr=1e-4))
autoencoder.add_loss(loss1)
autoencoder.add_loss(loss2)
autoencoder.fit(images, images, epochs=10, batch_size=200)

try:
    os.mkdir("./examples")
except:
    pass
try:
    os.mkdir("./models")
コード例 #8
0
        features.append(float(chas))
        features.append(float(nox))
        features.append(float(rm))
        features.append(float(age))
        features.append(float(dis))
        features.append(float(rad))
        features.append(float(tax))
        features.append(float(ptratio))
        features.append(float(b))
        features.append(float(lstat))

        # f = [0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98]

        print('Predicted Value:', model.predict(np.asarray(features).reshape(1, -1))[0])
        # print('Predicted Value:', model.predict(features)[0])
    except Exception as err:
        print('An error has occured.', err)


if __name__ == '__main__':
    # load dataset
    dataset = DataSet.load()

    # split dataset into training and testing subsets
    training_features, testing_features, training_target, testing_target = dataset.split_train_test()

    # list of Generalised ML Models
    models = get_models(training_features, training_target)

    show_main_menu()
コード例 #9
0
dataset_name = args.dataset_name
experiment_name = args.experiment_name

pylib.mkdir('./output/%s' % experiment_name)
with open('./output/%s/setting.txt' % experiment_name, 'w') as f:
    f.write(json.dumps(vars(args), indent=4, separators=(',', ':')))

# dataset
dataset, img_shape = data.get_dataset(dataset_name, batch_size)

# ==============================================================================
# =                                    graph                                   =
# ==============================================================================

# models
G, D = model.get_models(model_name)
D = partial(D,
            norm_name=norm,
            weights_norm_name='spectral_norm'
            if weights_norm == 'spectral_norm' else 'none')

# otpims
if optimizer == 'adam':
    optim = partial(tf.train.AdamOptimizer, beta1=0.5)
elif optimizer == 'rmsprop':
    optim = tf.train.RMSPropOptimizer

# loss func
d_loss_fn, g_loss_fn = model.get_loss_fn(loss_mode)

# inputs
コード例 #10
0
def main_train(args):
    now = datetime.now()
    save_to = Path(args.save_to) if args.save_to is not None else Path().cwd()
    save_dir = save_to / f'{now:%Y%m%d-%H%M-%S}'
    fix_seed(args.seed)
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.INFO)
    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')

    dataset_train, dataset_test, scaler = get_data(args)

    logging.info(f'training level: {args.level}')

    n_features = dataset_train.items.shape[1]
    generator, discriminator = get_models(args, n_features, device)

    if args.gan_type == 'vanilla':
        trainer = GANTrainer(generator, discriminator, device)
    elif args.gan_type == 'wgp':
        trainer = WGPGANTrainer(generator,
                                discriminator,
                                device,
                                lambda_=args.lambda_)
    else:
        raise ValueError(f'Unknown gan type: {args.gan_type}')

    optimizer_d = setup_optimizer(discriminator,
                                  args.learning_rate,
                                  weight_decay=0,
                                  args=args)
    optimizer_g = setup_optimizer(generator,
                                  args.learning_rate,
                                  weight_decay=0,
                                  args=args)

    if args.load_from is not None:
        load_model(Path(args.load_from), generator, discriminator, optimizer_g,
                   optimizer_d, device)

    experiment = Experiment(args.comet_api_key,
                            project_name=args.comet_project_name,
                            workspace=args.comet_workspace)
    experiment.log_parameters(vars(args))
    iterations_total = trainer.train(
        args,
        dataset_train,
        optimizer_g,
        optimizer_d,
        scaler=scaler,
        save_dir=save_dir,
        test_dataset=dataset_test.items[:len(dataset_test) // 10],
        experiment=experiment)

    n_events = len(dataset_test)
    steps = (args.gan_test_ratio * n_events) // args.eval_batch_size

    evaluate_model(generator, experiment, dataset_test, args.eval_batch_size,
                   steps, args, device, scaler, iterations_total)
    experiment.end()

    save_model(save_dir, generator, discriminator, optimizer_g, optimizer_d,
               iterations_total)
コード例 #11
0
from model import get_models
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import sys
from PIL import Image
import numpy as np
from os.path import join
import os
from keras.utils import Sequence
import matplotlib.pyplot as plt
from nightmares.images_defines import EXPERIMENTS_FOLDER
from nightmares import dataset
import warnings
warnings.simplefilter("ignore")

autoencoder, encoder, decoder = get_models()
print("Nº of parameters: {}".format(autoencoder.count_params()))

images = dataset.load_png_dataset(sample=2000,
                                  resize=(360, 120),
                                  conversion="RGB",
                                  strip_mode="1x3")
plt.imshow(images[0])
plt.show()


class Generator(Sequence):
    def __init__(self, imgs):
        self.images = np.float16(imgs) / 255

    def __len__(self):