def main(_run, _log): args = argparse.Namespace(**_run.config) args = post_config_hook(args, _run) args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") root = "./datasets" train_sampler = None if args.dataset == "STL10": train_dataset = torchvision.datasets.STL10( root, split="unlabeled", download=True, transform=TransformsSimCLR() ) elif args.dataset == "CIFAR10": train_dataset = torchvision.datasets.CIFAR10( root, download=True, transform=TransformsSimCLR() ) else: raise NotImplementedError train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) model, optimizer, scheduler = load_model(args, train_loader) tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"]) os.makedirs(tb_dir) writer = SummaryWriter(log_dir=tb_dir) mask = mask_correlated_samples(args) criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device) args.global_step = 0 args.current_epoch = 0 for epoch in range(args.start_epoch, args.epochs): lr = optimizer.param_groups[0]['lr'] loss_epoch = train(args, train_loader, model, criterion, optimizer, writer) if scheduler: scheduler.step() if epoch % 10 == 0: save_model(args, model, optimizer) writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch) writer.add_scalar("Misc/learning_rate", lr, epoch) print( f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}" ) args.current_epoch += 1 ## end training save_model(args, model, optimizer)
def main(verbose): dataset = load_dataset( glob('../data/trump_tweet_data_archive/condensed_*.json.zip'), verbose) corpus, sequences, next_chars, c2i, i2c, nc = seq_data( dataset, SEQ_LEN, SEQ_STEP, verbose) if verbose: print(f'corpus length: {len(corpus)}') print(f'num characters: {nc}') print(f'number of sequences: {len(sequences)}') # The data is shuffled so the validation data isn't simply the latest 20% of tweets X, y = vec_data(sequences, next_chars, SEQ_LEN, nc, c2i, verbose) # Split off the last 20% as validation data for pretty graphs n = len(X) num_val = int(PERCENT_VALIDATION * n) X_val = X[n - num_val:] y_val = y[n - num_val:] X_train = X[:n - num_val] y_train = y[:n - num_val] if verbose: print(f'Number validation samples: {num_val}') model = build_model(SEQ_LEN, nc, verbose) history = train_model(model, X_train, y_train, X_val, y_val, verbose) plot_model_loss(BASENAME, history, verbose) # Save the trained model so we don't have to wait 25 hours to generate another 10 tweet sample save_model(model, BASENAME, verbose) # Generate sample tweets using 10 random seeds from the corpus. generate(BASENAME, model, corpus, c2i, i2c, nc, 10, verbose)
def train_cnn(x_train, y_train, x_val, y_val): model = Sequential() model.add( Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_class, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model_fit_history = model.fit(x_train, y_train, batch_size=50, epochs=100, verbose=1, validation_data=(x_val, y_val)) loss, acc = model.evaluate(x_val, y_val, verbose=0) print('\nTesting loss: {}, acc: {}\n'.format(loss, acc)) plots_loss_accuracy_from_training(model_fit_history) save_model(model, path_model=path_save_model, path_weights=path_save_weights)
def test(rank, args, shared_model, counter): torch.manual_seed(args.seed + rank) env = create_atari_env(args.env_name) env.seed(args.seed + rank) model = ActorCritic(env.observation_space.shape[0], env.action_space) model.eval() state = env.reset() state = torch.from_numpy(state) reward_sum = 0 done = True start_time = time.time() # a quick hack to prevent the agent from stucking actions = deque(maxlen=100) episode_length = 0 while True: episode_length += 1 # Sync with the shared model if done: model.load_state_dict(shared_model.state_dict()) if done and counter.value > args.max_steps: test_final(shared_model, env, args) save_model(shared_model, args) exit() with torch.no_grad(): value, logit = model(state.unsqueeze(0)) prob = F.softmax(logit, dim=-1) action = prob.max(1, keepdim=True)[1].numpy() state, reward, done, _ = env.step(action[0, 0]) done = done or episode_length >= args.max_episode_length reward_sum += reward # a quick hack to prevent the agent from stucking actions.append(action[0, 0]) if actions.count(actions[0]) == actions.maxlen: done = True if done: print( "Time {}, num steps {}, FPS {:.0f}, episode reward {}, episode length {}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), counter.value, counter.value / (time.time() - start_time), reward_sum, episode_length)) reward_sum = 0 episode_length = 0 actions.clear() state = env.reset() time.sleep(60) state = torch.from_numpy(state)
def main(): args = load_arg() print(f"Run:{args.lang}") args.device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() set_seed(args) train_dataset, dev_dataset = load_shinra_data(args) model = CNN(args, train_dataset.num_labels, \ emb_freeze=args.emb_freeze, \ class_weight=train_dataset.get_class_weight()) model.to(args.device) scores = {} model, scores["train"], scores["dev"], best_epoch = train( args, train_dataset, dev_dataset, model) model.to("cpu") if args.output_dir is not None: output_dir = f"{args.output_dir}/{args.lang}" os.makedirs(output_dir, exist_ok=True) save_model(output_dir, model) save_json(f"{output_dir}/score.json", scores)
def main(args): files = glob(args.data_path + '*.jpg') training_data = ImageDataset(files[:int(len(files) * .9)]) dev_data = ImageDataset(files[int(len(files) * .9):]) model = train(training_data, dev_data, args) print('Saving model to %s' % args.o) save_model(model, args.o)
def train_main(): with open('config.json') as f: args = json.load(f) # make save folder try: print('Creating checkpoint folder...') os.makedirs(args['save_folder']) except OSError as e: if e.errno == errno.EEXIST: print('Directory already exists.') else: raise # read and preprocess data train_data = pd.read_csv(args['train_path']) preprocessed_train = preprocess(train_data, args) if args['train_model_weights']: # resume training model = restore_model(args['save_folder'], args['train_model_weights']) else: model = create_model(args) save_model(model, args['save_folder']) # split data for CV train_set, val_set = val_split(*preprocessed_train[0], preprocessed_train[1]) model, history = fit_model(model, train_set, val_set, args) plot_model_history(history, args['save_folder'])
def train(ctx, dataset_fpath, all_data, max_depth, model_fpath, name, test): if not os.path.isfile(dataset_fpath): logging.info('No dataset was provided, building with default settings') data.save_dataset(dataset_fpath) dataset = data.load_dataset(dataset_fpath, return_arrays=False) clf = model.REGISTRY[name](max_depth=max_depth) X_train, y_train = dataset['X_train'], dataset['y_train'] X_test, y_test = dataset['X_test'], dataset['y_test'] if all_data: X_train = np.concatenate((X_train, X_test), axis=0) y_train = np.concatenate((y_train, y_test), axis=0) clf.fit(X_train, y_train) model.save_model(clf, model_fpath) acc = clf.score(X_train, y_train) logging.info("Accuracy on training set: {}".format(acc)) if test: acc = clf.score(X_test, y_test) logging.info("Accuracy on the test set: {}".format(acc))
def main(args): # process input file input_file = util.ensure_local_file(args['train_file']) user_map, item_map, tr_sparse, test_sparse = model.create_test_and_train_sets( args, input_file, args['data_type']) # train model output_row, output_col = model.train_model(args, tr_sparse) # save trained model to job directory if args['data_type'] == 'user_ratings': model.save_model_json(args, user_map, item_map, output_row, output_col) user_items_w = model.get_user_items_w(input_file) model.save_user_items_w(args, user_items_w) else: model.save_model(args, user_map, item_map, output_row, output_col) # log results train_rmse = wals.get_rmse(output_row, output_col, tr_sparse) test_rmse = wals.get_rmse(output_row, output_col, test_sparse) if args['hypertune']: # write test_rmse metric for hyperparam tuning util.write_hptuning_metric(args, test_rmse) tf.logging.info('train RMSE = %.2f' % train_rmse) tf.logging.info('test RMSE = %.2f' % test_rmse)
def train(): data = load_dataset(dataset_path) print('Step1: Dataset is loaded successfully!') preprocessed_data = preprocessing(data) print('Step2: Data preprocessing done successfully!') train, test = train_test_split(preprocessed_data) print('Step3: Data splitted into train and test successfully!') train_X, train_Y, test_X, test_Y, vectorizer = feature_extraction( train, test) trained_model = model_training(train_X, train_Y) print('Step4: Model trained successfully successfully!') accuracy = model_testing(test_X, test_Y, trained_model) vec_classifier = Pipeline([('vectorizer', vectorizer), ('classifier', trained_model)]) save_model(vec_classifier) print('Step5: Model is deployed successfully') response = { 'success': True, 'message': 'Model deployed', 'accuracy': accuracy } return response
def main(argv=None): if not os.path.exists(Constants.MODEL_DIR): os.makedirs(Constants.MODEL_DIR) if not os.path.exists(Constants.TENSORBOARD_DIR): os.makedirs(Constants.TENSORBOARD_DIR) with open(Constants.CHARLIST_FILE, "rb") as fp: charList = pickle.load(fp) lenCharList = len(charList) with tf.device("CPU:0"): train_ds, train_image_count = create_datasets(Constants.TRAIN_TFRECORD) val_ds, val_image_count = create_datasets(Constants.VAL_TFRECORDS) train_batches = int(np.floor(train_image_count/Constants.BATCH_SIZE)) val_batches = int(np.floor(val_image_count/Constants.BATCH_SIZE)) model = CRNN(lenCharList) global_step_op = tf.Variable(0) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step_op, decay_steps=10000, decay_rate=0.1, staircase=False) optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate) epoch = 1 summary_writer = tf.contrib.summary.create_file_writer(Constants.TENSORBOARD_DIR, flush_millis=10000) with summary_writer.as_default(), tf.contrib.summary.always_record_summaries(): while True: print("Epoch "+str(epoch)) loss = train_on_batch(model, train_ds, train_batches, charList, optimizer) images, recognized, charErrorRate, wordAccuracy = validate_on_batch(model, val_ds, val_batches, charList, epoch) if charErrorRate < 15: save_model(model, epoch) write_to_tensorboard(epoch, images, recognized, loss, charErrorRate, wordAccuracy) epoch += 1
def main(train=True): # data train_loader, valid_loader, test_loader = dataset.get_dataset() # loss function criterion = nn.CrossEntropyLoss() # optimizer model = load_model(name) if model.classifier is not None: fc_params_id = list(map(id, model.classifier.parameters())) base_params = filter(lambda p: id(p) not in fc_params_id, model.parameters()) optimizer = optim.SGD([ {'params': base_params, 'lr': LR * freeze_rate}, # 0 {'params': model.classifier.parameters(), 'lr': LR}], momentum=momentum, weight_decay=weight_decay) elif model.fc is not None: fc_params_id = list(map(id, model.fc.parameters())) base_params = filter(lambda p: id(p) not in fc_params_id, model.parameters()) optimizer = optim.SGD([ {'params': base_params, 'lr': LR * freeze_rate}, # 0 {'params': model.fc.parameters(), 'lr': LR}], momentum=momentum, weight_decay=weight_decay) else: optimizer = optim.SGD(model.parameters(), lr=LR, momentum=momentum, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma) if train: writer = SummaryWriter(log_dir=log_dir, comment='test_tensorboard') if not RESUME: train_classifier(model, 100, train_loader, valid_loader, optimizer, criterion, scheduler, writer=writer) else: train_classifier_resume(model, optimizer, path_checkpoint, 100, train_loader, valid_loader, criterion, scheduler, writer=writer) save_model(model, name, save_state_dic=True) else: state_dict = torch.load(model_path) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") inference_model = load_inference_model(name) inference_model.load_state_dict(state_dict) inference_model.to(device) inference_model.eval() pred_list1 = [] pred_list2 = [] pred_list3 = [] with torch.no_grad(): for i, data in enumerate(test_loader): images, _ = data images = images.to(device) # tensor to vector outputs = inference_model(images) _, pred_top3 = torch.topk(outputs, k=3, dim=-1) pred_list1.append(pred_top3.data[:, 0].cpu().numpy().reshape((1, -1)).tolist()) pred_list2.append(pred_top3.data[:, 1].cpu().numpy().reshape((1, -1)).tolist()) pred_list3.append(pred_top3.data[:, 2].cpu().numpy().reshape((1, -1)).tolist()) lists = [pred_list1, pred_list2, pred_list3] inference.vector2label(lists)
def save_model(args): """save_model(args: argparse.Namespace) Increments the save counter, and saves the global model. You must first set `model.sess`. See `model.save_model` for more info. """ global model_save_counter model_save_counter += 1 path = os.path.join(args.model_output_dir, "model-%03i.npy" % model_save_counter) model.save_model(net, path)
def do_quick_distillation(start_epoch=-1): seed_init() train_dataloader, eval_dataloader = getdataLoader() xlnet_config = XLNetConfig.from_json_file(config.xlnet_config_root) student = XlnetCloze(xlnet_config) soft_labels = pickle.load(open(config.soft_label_file, "rb")) optimizer_grouped_parameters = get_optimizer_group(student) num_train_steps = int(train_dataloader.dataset.__len__() / config.train_batch_size * config.num_train_epochs) optimizer = AdamW(optimizer_grouped_parameters, lr=config.xlnet_learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'max', verbose=True, factor=config.decay, min_lr=config.min_lr, patience=config.patience) load_model(start_epoch, student, optimizer) if config.n_gpu > 1: student = nn.DataParallel(student) student.to(config.device) student.train() ave_loss, ave_hard_loss, ave_soft_loss, ave_train_accr = get_watch_index() global_step = (start_epoch + 1) * num_train_steps for epoch in trange(start_epoch + 1, config.num_train_epochs): student.zero_grad() for batch in tqdm(train_dataloader): input_ids, attention_mask, position, option_ids, tags, labels = batch input_ids, attention_mask, position, option_ids, tags, labels = to_device( input_ids, attention_mask, position, option_ids, tags, labels ) _, student_logits = student(input_ids, attention_mask, position, option_ids, tags, labels) teacher_probs = get_teacher_probs(soft_labels, tags).to(config.device) loss_hard = F.cross_entropy(student_logits, labels, reduction="mean") loss_soft, teacher_probs = cross_entropy_loss_with_temperature_v2(student_logits, teacher_probs, config.temperature) loss = config.alpha * loss_hard + (1.0 - config.alpha) * config.temperature * config.temperature * loss_soft loss.backward() ave_train_accr.add(cal_accr(student_logits, labels)) ave_loss.add((config.alpha * loss_hard + (1.0 - config.alpha) * loss_soft).item()) ave_soft_loss.add(loss_soft.item()) ave_hard_loss.add(loss_hard.item()) optimizer.step() optimizer.zero_grad() # ipdb.set_trace() show_watch_index(global_step, ave_teacher_accr=cal_accr(logits=teacher_probs, labels=labels)) if (global_step + 1) % config.show_loss_step == 0: now_lrs = show_lr(optimizer) show_watch_index(global_step, ave_hard_loss=ave_hard_loss, now_lrs=now_lrs, ave_soft_loss=ave_soft_loss, ave_loss=ave_loss) if global_step <= num_train_steps * config.warmup_proportion: warmup_adajust(num_train_steps, global_step, optimizer) global_step += 1 eval_accr = eval(student, eval_dataloader) show_watch_index(epoch, eval_accr=eval_accr, ave_train_accr=ave_train_accr) scheduler.step(eval_accr) save_model(epoch, student, optimizer)
def main(): print('--start--') # Folder Paths log_dir = './files/training_logs/' # Hyper parameters num_features = 5 classes = ['Dead', 'Alive: Wrong Direction', 'Alive: Right Direction'] num_classes = len(classes) epochs = 10 batch_size = 128 learning_rate = 0.01 # Load Data x_train, y_train = Data.generate_data(80000, num_features, num_classes) x_valid, y_valid = Data.generate_data(16000, num_features, num_classes) # Build model model = Model.build_model(num_features, num_classes, learning_rate) # View model summary model.summary() # Check memory needed during the training process (not accurate) Model.get_model_memory_usage(batch_size, model) # Get optimizer name opt_name = model.optimizer.__class__.__name__ # Get folder name hparam_str = make_hparam_string(opt_name, learning_rate, batch_size, epochs) log_dir += hparam_str output_dir = log_dir + 'model/' # Create folder prepare_dir(output_dir) # Train the model train(model, x_train, y_train, x_valid, y_valid, batch_size, epochs, log_dir) # Evaluate the model evaluate(model, classes, x_valid, y_valid, output_dir) # Save the model Model.save_model(model, classes, output_dir) # Test on game # test_in_game(model, 1000, False, True, 200) # Visualize # plt.show() print('--end--')
def train(): raw_data = pd.read_csv(train_data_path) data, labels = preprocess(raw_data) train_data, test_data, train_labels, test_labels = split_data(data, labels) trained_model = train_model(digit_recognition_model(), train_data, train_labels, 15) result = eval(trained_model, test_data, test_labels) print('model accuracy:', result) save_model(model_path, trained_model)
def main(): ############################################################################### # Load data ############################################################################### d = util.Dictionary() if args.task == "train": logging.info("Reading train...") trncorpus = util.read_corpus(args.ftrn, d, True) d.freeze() # no new word types allowed vocab_size = d.size() # save dict d.save_dict(fprefix + ".dict") logging.info("Reading dev...") devcorpus = util.read_corpus(args.fdev, d, False) elif args.task == "test": logging.info("Reading test...") d.load_dict(args.fdct) d.freeze() vocab_size = d.size() # load test corpus tstcorpus = util.read_corpus(args.ftst, d, False) ############################################################################### # Build the model ############################################################################### if args.task == "train": model_fname = fprefix + ".model" pretrained_model = None if args.fmod: # load pre-trained model pretrained_model = model.load_model(args.fmod, vocab_size, args.nclass, args.inputdim, args.hiddendim, args.nlayer, args.droprate) logging.info("Successfully loaded pretrained model.") trained = model.train(trncorpus, devcorpus, vocab_size, args.nclass, args.inputdim, args.hiddendim, args.nlayer, args.trainer, args.lr, args.droprate, args.niter, args.logfreq, args.verbose, model_fname, pretrained_model) dev_accuracy = model.evaluate(trained, devcorpus.docs) logging.info("Final Accuracy on dev: %s", dev_accuracy) model.save_model(trained, model_fname) else: trained_model = model.load_model(args.fmod, vocab_size, args.nclass, args.inputdim, args.hiddendim, args.nlayer, args.droprate) tst_accuracy = model.evaluate(trained_model, tstcorpus.docs) logging.info("Final Accuracy on test: %s", tst_accuracy)
def train(confg: TrainingConfig) -> TrainingResults: """ Train a new model based on the `config`, save it to the disk and return the training statistics. """ train_dataset, validation_dataset, labels = get_datasets(confg.data_dir, confg.image_size, confg.batch_size) model = get_model(confg.image_size, len(labels)) history = model.fit( train_dataset, validation_data=validation_dataset, epochs=confg.epochs ).history save_model(confg.model_path, model, labels) return TrainingResults(history['accuracy'], history['val_accuracy'], history['loss'], history['val_loss'])
def test_save_model(self): """ Test model saving """ mock_model = self.mock_model save_model(mock_model, self.test_model_dir) files = os.listdir(self.test_model_dir) if files: test_ext = os.path.splitext(files[0])[1] # Saves a models self.assertTrue(files, "no model saved") self.assertEqual(".h5", test_ext, "model not saved as '.h5'")
def main(_run, _log): args = argparse.Namespace(**_run.config) args = post_config_hook(args, _run) args.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") root = "./datasets" model = load_model(args) model = model.to(args.device) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) # TODO: LARS train_sampler = None train_dataset = torchvision.datasets.STL10(root, split="unlabeled", download=True, transform=TransformsSimCLR()) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), drop_last=True, num_workers=args.workers, sampler=train_sampler, ) tb_dir = os.path.join(args.out_dir, _run.experiment_info["name"]) os.makedirs(tb_dir) writer = SummaryWriter(log_dir=tb_dir) mask = mask_correlated_samples(args) criterion = NT_Xent(args.batch_size, args.temperature, mask, args.device) args.global_step = 0 args.current_epoch = 0 for epoch in range(args.start_epoch, args.epochs): loss_epoch = train(args, train_loader, model, criterion, optimizer, writer) writer.add_scalar("Loss/train", loss_epoch / len(train_loader), epoch) if epoch % 10 == 0: save_model(args, model, optimizer) print( f"Epoch [{epoch}/{args.epochs}]\t Loss: {loss_epoch / len(train_loader)}" ) args.current_epoch += 1 ## end training save_model(args, model, optimizer)
def main(disp_text=True): if config.fresh_model: config.all_losses = [] save_model(make_model()) model = load_model() if disp_text: print('created model.', end=' ') else: model = load_model() if not model: save_model(make_model()) model = load_model() if disp_text: print('created model.', end=' ') else: if disp_text: print('loaded model.', end=' ') data = load_data() data, data_dev = split_data(data) data = [d for i,d in enumerate(data) if i in [8,10,13,14]] print() seq_lens = [len(d) for d in data] print(f'seq lens: {seq_lens}') min_seq_len = min(seq_lens) print(f'min seq len: {min_seq_len}') if not config.max_seq_len or config.max_seq_len > min_seq_len: config.max_seq_len = min_seq_len data = [d[:config.max_seq_len] for d in data] # from random import choice # from torch import randn # data = [[randn(config.in_size) for _ in range(choice(range(config.max_seq_len//2,config.max_seq_len)))] for _ in range(10)] # data_dev = [] # for d in data: print(len(d)) if not config.batch_size or config.batch_size >= len(data): config.batch_size = len(data) elif config.batch_size < 1: config.batch_size = int(len(data)*config.batch_size) if disp_text: print(f'hm data: {len(data)}, hm dev: {len(data_dev)}, bs: {config.batch_size}, lr: {config.learning_rate}, \ntraining started @ {now()}') for ep in range(config.hm_epochs): for i, batch in enumerate(batchify_data(data)): train_on(model, batch) return model
def test_load_model(self): """ Test model loading """ model = self.mock_model loaded_model = None save_model(model, self.test_model_dir) files = os.listdir(self.test_model_dir) if files: model_name = files[ 0] # hardcoded, take first models from model_dir loaded_model = load_model(self.test_model_dir, model_name) # loaded models exists self.assertTrue(loaded_model, "no model loaded")
def celebrities_face_train(name, deploy=False): """ get celebrities_face dataset and train model by dataset after train, it saves model and logs file :param name: name of model, used for file name :param deploy: if True, train uses train set and test set, else train uses train set and valid set, default False """ batch_size = 32 epoch = 100 lr = 0.001 smoothing = 0.1 dset = dataset.get_dataset( './celebrities_face', [0.5893, 0.4750, 0.4330], [0.2573, 0.2273, 0.2134], ) num_classes = len(dset.classes) hyperparam_dict = { 'epoch': epoch, 'lr': lr, 'smoothing': smoothing, 'num_classes': num_classes } meters = [utils.AverageMeter(), utils.AverageMeter(), utils.AverageMeter()] train_set, test_set, train_labels = dataset.train_test_set_split( dset, 'celebrities_face') if deploy: train_loader, valid_loader = dataset.train_valid_loader_split( train_set, train_labels, batch_size, valid_size=0.1) else: train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2) valid_loader = torch.utils.data.DataLoader(test_set, batch_size=32, num_workers=2) train_model = model.get_inception_v3(num_classes=num_classes) result_log = train(train_model, train_loader, valid_loader, meters, hyperparam_dict) model.save_model(train_model, name) save_log(result_log, name)
def start_train_job(): try: with open(param_path, 'r') as hp: hyperparameters = json.loads(hp.read()) _model = model.train_model(training_path, hyperparameters) model.save_model(model_path, _model) except Exception as ex: # Write out an error file. This will be returned as the failureReason in the # DescribeTrainingJob result. trc = traceback.format_exc() with open(os.path.join(output_path, 'failure'), 'w') as s: s.write('Exception during training: ' + str(ex) + '\n' + trc) # Printing this causes the exception to be in the training job logs, as well. print('Exception during training: ' + str(ex) + '\n' + trc, file=sys.stderr) # A non-zero exit code causes the training job to be marked as Failed. sys.exit(255)
def train_and_evaluate(): train_data_gen, val_data_gen = load_data() model = create_model() history = model.fit_generator( train_data_gen, steps_per_epoch=(train_data_gen.samples // BATCH_SIZE), epochs=EPOCHS, validation_data=val_data_gen, validation_steps=(val_data_gen.samples // BATCH_SIZE) ) acc = history.history["accuracy"] val_acc = history.history["val_accuracy"] plot_acc(EPOCHS, acc, val_acc) save_model(model)
def train(self, num_epoch, verbose=True): for epoch in range(1, num_epoch + 1, 1): loss = self._run_epoch(verbose) savedir = save_model(self.model, self.savedir, loss) return { 'savedir': savedir, 'throughout': self.stats.throughout, 'running_avg': self.stats.running_avg }
def main(args): tf.logging.set_verbosity(tf.logging.INFO) # input files input_file = util.ensure_local_file(args.train_file) user_map, item_map, tr_sparse, test_sparse = model.create_test_and_train_sets( input_file) # train model output_row, output_col = model.train_model(args, tr_sparse) # save trained model to job directory model.save_model(args, user_map, item_map, output_row, output_col) # log results test_rmse = wals.get_rmse(output_row, output_col, test_sparse) util.write_hptuning_metric(args, test_rmse)
def train_model_VGG(x_train, y_train, x_validate, y_validate, num_classes): # Build, compile, and fit the model model = build_fully_connected(input_shape=X['train'].shape[1:], num_classes=num_classes) adam = optimizers.Adam(lr=0.0001) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) model_fit_history = model.fit(X['train'], Y['train'], batch_size=64, epochs=50, verbose=2, validation_data=(X['validate'], Y['validate'])) epochs = np.argmin(model_fit_history.history['val_loss']) + 1 print(f'Stop training at {epochs} epochs') plots_loss_accuracy_from_training( model_fit_history ) # plots for loss and accuracy model after training # Merge training and validation data X_train = np.concatenate([x_train, x_validate]) #concatenate train dataset Y_train = np.concatenate([y_train, y_validate]) #concatenate validation dataset # Randomly shuffle X and Y shuffle_index = np.random.permutation(len(X_train)) X_train = X_train[shuffle_index] Y_train = Y_train[shuffle_index] model = build_fully_connected(input_shape=X_train.shape[1:], num_classes=num_classes) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) print('Train with Training dataset + Validation dataset as input.') model_fit_history = model.fit( X_train, Y_train, batch_size=64, epochs=epochs, verbose=0) # train with trainign and validation dataset save_model(model, path_save_model_vgg, path_save_weight_vgg)
def train(num_epochs): model_to_device(model) params = [p for p in model.parameters()] trainable = [p for p in params if p.requires_grad] print(f"{len(trainable)} of {len(params)} model parameters are trainable.") optimizer = torch.optim.SGD(params=trainable, lr=0.005, momentum=0.9, weight_decay=0.0005) for epoch in range(num_epochs): print(f"Beginning epoch {epoch + 1} of {num_epochs}.") model.train() start = math.floor(time()) with ForwardPassCounter(max=len(data_loader)) as counter: epoch_loss = 0 for t_imgs, t_annotations in data_loader: t_imgs, t_annotations = input_to_device(t_imgs, t_annotations) loss_dict = model(t_imgs, t_annotations) losses = sum(loss for loss in loss_dict.values()) optimizer.zero_grad() losses.backward() optimizer.step() epoch_loss += losses counter.next() print(epoch_loss) print(f"Epoch duration: {math.floor(time()) - start} seconds.") print( f"Overwriting ./checkpoints/rcnn_{session_id}.pt with newest weights...", end="") save_model(model, session_id) print("Done.")
def checkpoint2model(checkpoint_path: str, model_dir: str): """ Given a checkpoint file, generates a model file that can be loaded by run_TEDD1104.py script. Input: - checkpoint_path path of checkpoint file (checkpoint.pt) - model_path directory where the model is going to be saved (model.bin and model_hyperparameters.json) Output: """ if not os.path.exists(model_dir): print(f"{model_dir} does not exits. We will create it.") os.makedirs(model_dir) print_message(f"Loading checkpoint: {checkpoint_path}") ( tedd1104_model, _, _, _, running_loss, total_batches, total_training_examples, acc_dev, epoch, fp16, _, ) = model.load_checkpoint(path=checkpoint_path, device=model.torch.device("cpu")) print(f">>>>>> Checkpoint info <<<<<<\n" f"Running loss: {running_loss/total_batches}\n" f"Num epochs: {epoch+1}\n" f"Total training examples: {total_training_examples}\n" f"Acc dev set: {round(acc_dev*100,2)}\n" f"FP16: {fp16}\n") print_message(f"Saving model in {model_dir}") model.save_model(model=tedd1104_model, save_dir=model_dir, fp16=fp16) print_message(f"Done!")
def main(args): # process input file input_file = util.ensure_local_file(args['train_files'][0]) user_map, item_map, tr_sparse, test_sparse = model.create_test_and_train_sets( args, input_file, args['data_type']) # train model output_row, output_col = model.train_model(args, tr_sparse) # save trained model to job directory model.save_model(args, user_map, item_map, output_row, output_col) # log results train_rmse = wals.get_rmse(output_row, output_col, tr_sparse) test_rmse = wals.get_rmse(output_row, output_col, test_sparse) if args['hypertune']: # write test_rmse metric for hyperparam tuning util.write_hptuning_metric(args, test_rmse) tf.logging.info('train RMSE = %.2f' % train_rmse) tf.logging.info('test RMSE = %.2f' % test_rmse)
filters=filters, feats_to_cache=feats_to_cache, testing=False, feat_score_weight=0.5, local_search_width=32, local_search_step=2, processing_time_ratio=2.0, adapt_improve=True, use_best_data=True, use_all_data=False, testing_dir='/tmp', n_thumbs=6, startend_clip=0.025) clip_finder = clip_finder.ClipFinder( None, scenedetect.detectors.ContentDetector(30.0), model.features.ObjectActionGenerator(), valence_weight=1.0, action_weight=0.25, custom_weight=0.5, processing_time_ratio=0.7, startend_clip=0.1, cross_scene_boundary=True, min_scene_piece=15) mod = model.Model(None, vid_searcher=video_searcher, clip_finder=clip_finder) model.save_model(mod, options.output)
def remove_disk_cache(obj): '''Recusively removes disk caches from the object.''' for name, val in obj.__dict__.items(): if isinstance(val, features.DiskCachedFeatures): obj.__dict__[name] = ( features.MemCachedFeatures.create_shared_cache( val.feature_generator)) else: try: remove_disk_cache(val) except AttributeError: pass return obj if __name__ == '__main__': parser = OptionParser() parser.add_option('--output', '-o', default='neon.model', help='File to output the model definition') parser.add_option('--input', '-i', default='neon.model', help='File to input the model definition') options, args = parser.parse_args() model.save_model( remove_disk_cache(model.load_model(options.input)), options.output)
'--projectId', help = 'ID (not name) of your project', required = True ) parser.add_argument( '--job-dir', help = 'output directory for model, automatically provided by gcloud', required = True ) args = parser.parse_args() arguments = args.__dict__ model.PROJECT = arguments['projectId'] model.KEYDIR = 'trainer' estimator, rmse = model.train_and_evaluate(arguments['frac'], arguments['maxDepth'], arguments['numTrees'] ) loc = model.save_model(estimator, arguments['job_dir'], 'babyweight') print("Saved model to {}".format(loc)) # this is for hyperparameter tuning hpt = hypertune.HyperTune() hpt.report_hyperparameter_tuning_metric( hyperparameter_metric_tag='rmse', metric_value=rmse, global_step=0) # done