def main_eval(args): assert args.load_from is not None, '--load_from required in eval mode' logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) dataset_train, dataset_test, scaler = get_data(args) logging.info(f'evaluation mode. Level: {args.level}') device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') n_features = dataset_train.items.shape[1] generator, discriminator = get_models(args, n_features, device) experiment = Experiment(args.comet_api_key, project_name=args.comet_project_name, workspace=args.comet_workspace) experiment.log_parameters(vars(args)) load_model(Path(args.load_from), generator, discriminator, None, None, device) n_events = len(dataset_test) steps = (args.gan_test_ratio * n_events) // args.eval_batch_size evaluate_model(generator, experiment, dataset_test, args.eval_batch_size, steps, args, device, scaler, 0)
def train(restore): encoders = get_encoders() dataset = get_dataset(encoders, difficulty=10) text_rnn, generator, discriminator, gan = get_models(encoders) checkpoint_path = path.join(config.CHECKPOINT_DIR, "keras", "text_rnn.ckpt") if restore: text_rnn.load_weights(checkpoint_path) logger = EvaluationLogger(generator, dataset, encoders) accumulator = MetricsAccumulator(path.join(config.LOG_DIR, "stats")) _train_on_batch_f = _get_train_on_batch_f(generator, discriminator, gan, accumulator) difficulty = 10 dataset = get_dataset(encoders, difficulty) train_data = dataset.batch(config.BATCH_SIZE).take(config.STEPS_PER_EPOCH) for epoch in range(config.NUM_EPOCHS): # if epoch >= 500 and epoch % 10==0: # difficulty += 1 # dataset = get_dataset(encoders, difficulty) # train_data = dataset.batch(config.BATCH_SIZE).take(config.STEPS_PER_EPOCH) start_time = time.time() discr_only_steps = 0 # if epoch < 500 else 1 for b, (text_inputs_dict, images) in enumerate(train_data): print(f"{b} completed", end="\r") train_part = TRAIN_D if epoch < 5 else \ TRAIN_GD if b%(discr_only_steps+1) == 0 else TRAIN_D _train_on_batch_f(text_inputs_dict, images, train_part) accumulator.accumulate(epoch) logger.on_epoch_end(epoch) logging.info( "Done with epoch %s took %ss (difficulty=%s; discr_only_steps=%s)", epoch, round(time.time() - start_time, 2), difficulty, discr_only_steps)
def train_model(config): PATH = os.path.join('{}{}'.format(config['modelroot'], config['num_used_classes'])) outputs_path = os.path.join(PATH, config['model_name']) print('Making dir: {}'.format(outputs_path)) os.makedirs(outputs_path, exist_ok=True) device = 'cuda' if torch.cuda.is_available() else 'cpu' model, input_size = get_models(config) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.to(device) total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('Total training parameters:', total_params) #return Xtrain, ytrain, categories = load_data(config, load_train=True) Xtest, ytest, _ = load_data(config, load_train=False) Xtrain, Xval, ytrain, yval = train_test_split(Xtrain, ytrain, test_size=config['val_size'], random_state=config['seed']) transform1 = transforms.Compose([ transforms.Resize((input_size, input_size), interpolation=Image.NEAREST) ]) transform2 = { 'train': transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), } traindata = Pokemons(Xtrain, ytrain, categories, transform1=transform1, transform2=transform2['train']) valdata = Pokemons(Xval, yval, categories, transform1=transform1, transform2=transform2['val']) testdata = Pokemons(Xtest, ytest, categories, transform1=transform1, transform2=transform2['val']) trainloader = DataLoader(traindata, batch_size=config['batch_size'], shuffle=True, num_workers=4) valloader = DataLoader(valdata, batch_size=config['batch_size'], shuffle=False, num_workers=4) optimizer = optim.Adam(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay']) criterion = nn.CrossEntropyLoss().to(device) start_time = time.time() TRAINLOSS = [] TRAINACC = [] VALLOSS = [] VALACC = [] for epoch in range(1, config['epochs'] + 1): trainloss, trainacc = running(model, trainloader, optimizer, criterion, device, len(traindata), train=True) valloss, valacc = running(model, valloader, optimizer, criterion, device, len(valdata), train=False) end_time = time.time() print('Epochs {}/{}: Time={:.2f}\n\tTrain loss {:.4f}, Train acc {:.4f}\n\t'\ 'Val loss {:.4f}, Val acc {:.4f}'.format(epoch, config['epochs'], end_time - start_time, trainloss, trainacc, valloss, valacc)) start_time = time.time() TRAINLOSS.append(trainloss) TRAINACC.append(trainacc) VALLOSS.append(valloss) VALACC.append(valacc) # testing ypred = testing(model, testdata, device) with open(os.path.join(PATH, config['logs']), 'a+') as f: f.write('model: {}, acc: {}\n'.format(config['model_name'], accuracy_score(ypred, ytest))) torch.save(model, os.path.join(outputs_path, 'model_final.pt')) pickle.dump(TRAINLOSS, open(os.path.join(outputs_path, 'trainloss.pkl'), 'wb')) pickle.dump(TRAINACC, open(os.path.join(outputs_path, 'trainacc.pkl'), 'wb')) pickle.dump(VALLOSS, open(os.path.join(outputs_path, 'valloss.pkl'), 'wb')) pickle.dump(VALACC, open(os.path.join(outputs_path, 'valacc.pkl'), 'wb'))
from tensorflow.keras.preprocessing.sequence import pad_sequences from greedy_approach import greedy_translation from bleu import list_bleu from rouge import Rouge from sklearn.metrics import accuracy_score checkpoint_file = 'saved_models/best_model_acc.h5' if not os.path.isfile(checkpoint_file): print("Pretraining of the model required. Aborting...") sys.exit() if __name__ == "__main__": # Load pretrained model translation_model, encoder, decoder = get_models(for_training=False) translation_model.load_weights(checkpoint_file) # compute Scores if K.ADVANCED_METRICS: ''' To compute the accuracy, we need: target_ref_sequences = tokenized sentences from original sentences target_hyp_sequences = predicted tokens for each sentence To compute BLEU and ROUGE, we need: target_ref_sentences = original sentences target_hyp_sentences = decoded predicted sentences ''' print("Computing BLEU, ROUGE and Accuracy scores...") # from all sentences (1.920.209), use the next x sentences not used for training the model to evaluate # the model performance, with x = 'K.NUMBER_TEST_SENTENCES' if K.NUMBER_SENTENCES is not None:
def main(input_filepath, output_filepath): """ This module trains the Random Forest Classifier model if it does not yet exist, or if it does exist updates the model and selects the best performing one for production. """ logger = logging.getLogger(__name__) logger.info('making final data set from raw data') project_dir = Path(__file__).resolve().parents[2] # Train a new version of the model data = get_dataset(os.path.join(project_dir, input_filepath)) x_cols = ['ReleaseNumber', 'New_Release_Flag', 'StrengthFactor', 'PriceReg', 'ReleaseYear', 'ItemCount', 'LowUserPrice', 'LowNetPrice', 'MarketingTypeD'] y_col = 'SoldFlag' X_train, X_test, y_train, y_test = train_test_split(data[x_cols], data[y_col].values, test_size=0.30, random_state=42) clf = RandomForestClassifier(max_depth=5, random_state=42, criterion='gini', n_estimators=100, verbose=1, class_weight='balanced') clf.fit(X_train, y_train) # Evaluate model now = datetime.now() # current date and time date_time = now.strftime("%m-%d-%Y_%H-%M-%S") plot_filename = f'model-{date_time}-evaluation-plots.png' classifier_model_plot(X_test, y_test, clf, (20, 10), output_filepath, plot_filename) # Save to file pkl_filename = f"model-{date_time}.pkl" with open(os.path.join(output_filepath, pkl_filename), 'wb') as file: pickle.dump(clf, file) # Add to model collection and compare for best model models = get_models(os.path.join(project_dir, output_filepath)) clf_model = Model(pkl_filename, clf, get_model_auc(X_test, y_test, clf), plot_filename) clf_model.add_to_metadata(output_filepath) if len(models) > 0: models.append(clf_model) best_model = compare_models(models, X_test, y_test) else: best_model = clf_model if best_model.file_name == pkl_filename: logger.info(f'Currently trained model {pkl_filename} was deemed best model! Moved model to production.') else: logger.info(f'Earlier trained model {best_model.file_name} was deemed best model. Moved model to production.') logger.info(f'\nThe best model\'s properties are as follows: \n \n {best_model}') move_to_production(output_filepath, best_model)
features.append(float(rm)) features.append(float(age)) features.append(float(dis)) features.append(float(rad)) features.append(float(tax)) features.append(float(ptratio)) features.append(float(b)) features.append(float(lstat)) # f = [0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98] print('Predicted Value:', model.predict(np.asarray(features).reshape(1, -1))[0]) # print('Predicted Value:', model.predict(features)[0]) except Exception as err: print('An error has occured.', err) if __name__ == '__main__': # load dataset dataset = DataSet.load() # split dataset into training and testing subsets training_features, testing_features, training_target, testing_target = dataset.split_train_test( ) # list of Generalised ML Models models = get_models(training_features, training_target) show_main_menu()
from model import get_models from keras.optimizers import Adam import sys from PIL import Image import numpy as np from os.path import join import os sys.path.append("../..") import dataset autoencoder, encoder, decoder = get_models(input_shape=(100, 100, 1), latent_dim=64) print("Nº of parameters: {}".format(autoencoder.count_params())) images = dataset.load_png_dataset(sample=100, resize=(300, 100), conversion="L", strip_mode="1x3") images = (images/255).astype(np.float16) from keras.losses import mean_squared_error, cosine loss1 = cosine(autoencoder.layers[0].input, autoencoder.layers[-2].output) loss2 = mean_squared_error(autoencoder.layers[0].input, autoencoder.layers[-2].output) autoencoder.compile(metrics=["mse"], optimizer=Adam(lr=1e-4)) autoencoder.add_loss(loss1) autoencoder.add_loss(loss2) autoencoder.fit(images, images, epochs=10, batch_size=200) try: os.mkdir("./examples") except: pass try: os.mkdir("./models")
features.append(float(chas)) features.append(float(nox)) features.append(float(rm)) features.append(float(age)) features.append(float(dis)) features.append(float(rad)) features.append(float(tax)) features.append(float(ptratio)) features.append(float(b)) features.append(float(lstat)) # f = [0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98] print('Predicted Value:', model.predict(np.asarray(features).reshape(1, -1))[0]) # print('Predicted Value:', model.predict(features)[0]) except Exception as err: print('An error has occured.', err) if __name__ == '__main__': # load dataset dataset = DataSet.load() # split dataset into training and testing subsets training_features, testing_features, training_target, testing_target = dataset.split_train_test() # list of Generalised ML Models models = get_models(training_features, training_target) show_main_menu()
dataset_name = args.dataset_name experiment_name = args.experiment_name pylib.mkdir('./output/%s' % experiment_name) with open('./output/%s/setting.txt' % experiment_name, 'w') as f: f.write(json.dumps(vars(args), indent=4, separators=(',', ':'))) # dataset dataset, img_shape = data.get_dataset(dataset_name, batch_size) # ============================================================================== # = graph = # ============================================================================== # models G, D = model.get_models(model_name) D = partial(D, norm_name=norm, weights_norm_name='spectral_norm' if weights_norm == 'spectral_norm' else 'none') # otpims if optimizer == 'adam': optim = partial(tf.train.AdamOptimizer, beta1=0.5) elif optimizer == 'rmsprop': optim = tf.train.RMSPropOptimizer # loss func d_loss_fn, g_loss_fn = model.get_loss_fn(loss_mode) # inputs
def main_train(args): now = datetime.now() save_to = Path(args.save_to) if args.save_to is not None else Path().cwd() save_dir = save_to / f'{now:%Y%m%d-%H%M-%S}' fix_seed(args.seed) logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') dataset_train, dataset_test, scaler = get_data(args) logging.info(f'training level: {args.level}') n_features = dataset_train.items.shape[1] generator, discriminator = get_models(args, n_features, device) if args.gan_type == 'vanilla': trainer = GANTrainer(generator, discriminator, device) elif args.gan_type == 'wgp': trainer = WGPGANTrainer(generator, discriminator, device, lambda_=args.lambda_) else: raise ValueError(f'Unknown gan type: {args.gan_type}') optimizer_d = setup_optimizer(discriminator, args.learning_rate, weight_decay=0, args=args) optimizer_g = setup_optimizer(generator, args.learning_rate, weight_decay=0, args=args) if args.load_from is not None: load_model(Path(args.load_from), generator, discriminator, optimizer_g, optimizer_d, device) experiment = Experiment(args.comet_api_key, project_name=args.comet_project_name, workspace=args.comet_workspace) experiment.log_parameters(vars(args)) iterations_total = trainer.train( args, dataset_train, optimizer_g, optimizer_d, scaler=scaler, save_dir=save_dir, test_dataset=dataset_test.items[:len(dataset_test) // 10], experiment=experiment) n_events = len(dataset_test) steps = (args.gan_test_ratio * n_events) // args.eval_batch_size evaluate_model(generator, experiment, dataset_test, args.eval_batch_size, steps, args, device, scaler, iterations_total) experiment.end() save_model(save_dir, generator, discriminator, optimizer_g, optimizer_d, iterations_total)
from model import get_models from keras.optimizers import Adam from keras.callbacks import ModelCheckpoint import sys from PIL import Image import numpy as np from os.path import join import os from keras.utils import Sequence import matplotlib.pyplot as plt from nightmares.images_defines import EXPERIMENTS_FOLDER from nightmares import dataset import warnings warnings.simplefilter("ignore") autoencoder, encoder, decoder = get_models() print("Nº of parameters: {}".format(autoencoder.count_params())) images = dataset.load_png_dataset(sample=2000, resize=(360, 120), conversion="RGB", strip_mode="1x3") plt.imshow(images[0]) plt.show() class Generator(Sequence): def __init__(self, imgs): self.images = np.float16(imgs) / 255 def __len__(self):