def infer(hparams): # Build Models and Data # ------------------------------------------ _, infer_model = model_utils.build_model(hparams) manager = training_manager.TrainingManager( name=hparams.task_names[MAIN_MODEL_INDEX], logdir=hparams.manager_logdir) if hparams.ckpt_file is not None: ckpt_file = hparams.ckpt_file print("Using Specified CKPT from %s" % ckpt_file) else: ckpt_file = manager.best_checkpoint print("Using Manager CKPT from %s" % ckpt_file) if ckpt_file is None: raise ValueError("`ckpt_file` is None") tf.logging.info("Running Evaluation") infer_model.initialize_or_restore_session( ckpt_file=ckpt_file, var_filter_fn=lambda name: "Adam" not in name) infer_model.initialize_data_iterator() infer_model.inference(model_idx=MAIN_MODEL_INDEX)
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('-c', dest='config') parser.add_argument('-f', dest='test_file') parser.add_argument('-e', dest='epoch', type=int) args = parser.parse_args() config.parse(args.config) config.cuda = False vocab = load_vocabulary() model = build_model(len(vocab.word2index), load_ckpt=True, ckpt_epoch=args.epoch) config.use_cuda = False model.cpu() bot = BotAgent(model, vocab) if args.test_file is not None: with open(args.test_file) as file: question_list = [] for line in file: question_list.append(line[:-1]) for question in question_list: print('> %s' % question) print('bot: %s' % bot.response(question)) else: while True: user_input = input('me: ') if user_input.strip() == '': continue print('%s: %s' % ('bot', bot.response(user_input)))
def trainMTL(hparams): # Build Models and Data # ------------------------------------------ # with misc_utils.suppress_stdout(): train_MTL_model, val_MTL_model = model_utils.build_model(hparams) # building training monitor # ------------------------------------------ # early stop on the **target** task eval_task = hparams.tasks[hparams.eval_model_index] manager = training_manager.TrainingManager( name=eval_task.name, logdir=hparams.manager_logdir, stopping_fn=eval_task.manager_stopping_fn( tolerance=EARLY_STOP_TOLERANCE), updating_fn=eval_task.manager_updating_fn(), load_when_possible=False) scores_dict = _train( hparams=hparams, manager=manager, train_MTL_model=train_MTL_model, val_MTL_model=val_MTL_model) # log the results for easier inspectation with open(hparams.train_logfile, "a") as f: for tag, score in scores_dict.items(): f.write("%s: %.3f\t" % (tag, score)) f.write("\n") print("FINISHED")
def train_val(model, base_model): train_gen = sample_gen(file_id_mapping_train) # print gen(train_gen, batch_size).next() test_gen = sample_gen(file_id_mapping_test) checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min') early = EarlyStopping(monitor="val_loss", mode="min", patience=5) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3) callbacks_list = [checkpoint, early, reduce_lr] # early history = model.fit_generator(gen(train_gen, batch_size), validation_data=gen(test_gen, batch_size), epochs=60, verbose=1, workers=4, use_multiprocessing=True, callbacks=callbacks_list, steps_per_epoch=500, validation_steps=30) # model.compile(loss=identity_loss, optimizer=SGD(0.000001)) # history = model.fit_generator(gen(train_gen, batch_size), validation_data=gen(test_gen, batch_size), epochs=60, verbose=1, workers=4, use_multiprocessing=True, # callbacks=callbacks_list, steps_per_epoch=500, validation_steps=30) # return file_name = file_path for i in xrange(1, 10): train_file_distance = gen_distance(base_model, file_id_mapping_train, i) test_file_distance = gen_distance(base_model, file_id_mapping_test, i) train_gen = hard_sample_gen(train_file_distance) test_gen = hard_sample_gen(test_file_distance) model, base_model = build_model() model = multi_gpu_model(model, gpus=4) model.compile(loss=identity_loss, optimizer=Adam(0.000001)) model.load_weights(file_name) file_name = 'hard_{}.h5'.format(i) checkpoint = ModelCheckpoint(file_name, monitor='val_loss', verbose=1, save_best_only=True, mode='min') early = EarlyStopping(monitor="val_loss", mode="min", patience=15) history = model.fit_generator(gen(train_gen, batch_size), validation_data=gen( test_gen, batch_size), epochs=60, verbose=1, workers=4, use_multiprocessing=True, steps_per_epoch=500, validation_steps=30, callbacks=[checkpoint, early])
def run_single_experiment(cfg, diffinit, seed, replace_index): t0 = time() # how we convert the cfg into a path and such is defined in ExperimentIdentifier exp = ExperimentIdentifier(seed=seed, replace_index=replace_index, diffinit=diffinit) exp.init_from_cfg(cfg) exp.ensure_directory_exists(verbose=True) path_stub = exp.path_stub() print('Running experiment with path', path_stub) # load data x_train, y_train, x_vali, y_vali, x_test, y_test = load_data( options=cfg['data'], replace_index=replace_index) # define model init_path = get_model_init_path(cfg, diffinit) model = build_model(**cfg['model'], init_path=init_path) # prep model for training prep_for_training( model, seed=seed, optimizer_settings=cfg['training']['optimization_algorithm'], task_type=cfg['model']['task_type']) # now train train_model(model, cfg['training'], cfg['logging'], x_train, y_train, x_vali, y_vali, path_stub=path_stub) # clean up del model clear_session() print('Finished after', time() - t0, 'seconds')
def __init__(self, model_type, input_shape, model_args=None, fp16=False, iterations=200, result_file=None, warmup=5): if model_args is None: model_args = {} self.modle_type = model_type self.model = build_model(model_type, **model_args) self.iterations = iterations self.warmup = warmup self.input_shape = input_shape self.result_file = result_file if len(input_shape) == 5: # 2d model self.num_segments = input_shape[1] elif len(input_shape) == 6: # 3d model self.num_segments = input_shape[3] else: raise ValueError(f"Invalid input shape{input_shape}") self.fp16 = fp16 self.random_inputs = self.model.build_random_inputs(fp16, input_shape)
def main(): vocab = load_vocabulary() model = build_model(len(vocab.word2index), load_ckpt=True, ckpt_epoch=ckpt_epoch) bot = BotAgent(model, vocab) while True: user_input = raw_input('me: ') if user_input.strip() == '': continue print('%s: %s' % (BOT_NAME, bot.response(user_input)))
def main(): # Get Command Line Arguments args = get_command_line_args() use_gpu = torch.cuda.is_available() and args.gpu print("Data directory: {}".format(args.data_dir)) if use_gpu: print("Training on GPU.") else: print("Training on CPU.") print("Architecture: {}".format(args.arch)) if args.save_dir: print("Checkpoint save directory: {}".format(args.save_dir)) print("Learning rate: {}".format(args.learning_rate)) print("Hidden units: {}".format(args.hidden_units)) print("Epochs: {}".format(args.epochs)) # Get data loaders dataloaders, class_to_idx = model_utils.get_loaders(args.data_dir) for key, value in dataloaders.items(): print("{} data loader retrieved".format(key)) # Build the model model, optimizer, criterion = model_utils.build_model( args.arch, args.hidden_units, args.learning_rate) model.class_to_idx = class_to_idx # Check if GPU availiable and move if use_gpu: print("GPU is availaible. Moving Tensors.") model.cuda() criterion.cuda() # Train the model model_utils.train_model(model, args.epochs, criterion, optimizer, dataloaders['training'], dataloaders['validation'], use_gpu) # Save the checkpoint if args.save_dir: if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) save_path = args.save_dir + '/' + args.arch + '_checkpoint.pth' else: save_path = args.arch + '_checkpoint.pth' print("Will save checkpoint to {}".format(save_path)) save(args.arch, args.learning_rate, args.hidden_units, args.epochs, save_path, model, optimizer) print("Checkpoint saved") # Validate the accuracy test_loss, accuracy = model_utils.validate(model, criterion, dataloaders['testing'], use_gpu) print("Test Loss: {:.3f}".format(test_loss)) print("Test Acc.: {:.3f}".format(accuracy))
def select_proper_checkpoint(): for epoch in arange(MIN_EPOCH, MAX_EPOCH + STEP_SIZE, STEP_SIZE): vocab = load_vocabulary() model = build_model(len(vocab.word2index), load_checkpoint=True, checkpoint_epoch=epoch, print_module=False) data_set = build_data_loader(batch_size=BATCH_SIZE) test_loss = model_evaluate(model, data_set) print('EPOCH %d Test PPL: %.4f' % (epoch, math.exp(test_loss)))
def vp3d_model(self): clip_df = interpolate(self.clip_df,interpolate_feet=False) clip_df = delete_nans(clip_df) multiplier = round(800/224,2) clip_df = rescale_keypoints(clip_df,multiplier) actions, poses = fetch_keypoints(clip_df) classes = 8 chk_filename = os.path.join(CHECKPATH,"Recipe-2-epoch-19.pth") model = build_model(chk_filename, in_joints, in_dims, out_joints, filter_widths, True, channels, embedding_len,classes) pretrained = torch.load('../../virtual_trainer/Virtual_trainer/checkpoint/combinedlearning2-5.pth') model.load_state_dict(pretrained['model_state_dict']) with torch.no_grad(): model.eval() if torch.cuda.is_available(): model = model.cuda() # poses = poses.cuda() try: poses = np.concatenate(poses) except ValueError: self.prediction = "No human detected" return self poses = np.pad(poses,((54,0),(0,0),(0,0)),'edge') poses = torch.Tensor(np.expand_dims(poses,axis=0)).cuda() # print(f'Poses shape: {poses.shape}') embeds, preds = model(poses) kp_3d = model.transform.get_kp() n_frames = kp_3d.shape[1] kp_3d *= np.array([1,-1,1]) kp_3d = kp_3d.reshape(-1) # print(f'Preds shape:{preds.shape}') # print(preds) softmax = torch.nn.Softmax(1) pred= softmax(preds) pred = pred.detach().cpu().numpy().squeeze() print(pred) preds = np.argmax(pred,axis=1) print(preds) values, counts = np.unique(preds,return_counts=True) # print(values) # print(counts) ind = np.argmax(counts) print(EXC_DICT[values[ind]]) # msgbox(f'Predicted exercise: {EXC_DICT[values[ind]]}','Result') self.prediction = EXC_DICT[values[ind]] print(self.prediction) return kp_3d, n_frames
def train(): dataset = build_DataLoader() vocabulary_list = sorted(dataset.vocabulary.word2index.items(),\ key=lambda x: x[1]) save_vocabulary(vocabulary_list) vocab_size = dataset.get_vocabulary_size() model = build_model(vocab_size, load_ckpt = True) print(model) optimizer = optim.SGD(model.parameters(), lr = config.learning_rate) criterion = nn.CrossEntropyLoss() start = time.time() total_batch = len(dataset) ckpts = get_ckpts() iter_idx = 0 if len(ckpts) == 0 else max(ckpts) print_loss_total = 0.0 milestones = init_milestone(total_batch) n_iters = milestones[-1] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones = milestones, gamma=0.5) scheduler.step(iter_idx) print('Start Training. total: %s iterations'%n_iters) while iter_idx < n_iters: iter_idx += 1 scheduler.step() input_group, target_group = dataset.random_batch() # zero gradients optimizer.zero_grad() # run seq2seq _, loss = model(criterion, input_group, target_group, teacher_forcing_ratio=1) print_loss_total += loss.data[0] loss.backward() clip_grad_norm(model.parameters(), config.clip) # update parameters optimizer.step() if iter_idx % config.print_every == 0: test_loss = model_evaluate(model, criterion, dataset) print_summary(start, iter_idx, n_iters,\ math.exp(print_loss_total / config.print_every),\ optimizer.param_groups[0]['lr']) print('Test loss: %.4f ' % (math.exp(test_loss))) print_loss_total = 0.0 # hot_update_lr(optimizer) if iter_idx % config.save_every == 0: save_model(model, iter_idx) # break save_model(model, iter_idx)
def main(): vocab = load_vocabulary() model = build_model(len(vocab.word2index), load_ckpt=True, ckpt_epoch=ckpt_epoch) bot = BotAgent(model, vocab) # IRIS.initialise() while True: user_input = raw_input('me: ') if user_input.strip() == '': continue Iris_resp = IRIS.main(user_input, 2) if Iris_resp != '---$---' and len(Iris_resp.split()) <= 10 : print('%s: %s' % ("Iris", Iris_resp)) else: print('%s: %s' % ("Seq", bot.response(user_input)))
def main(): vocab = load_vocabulary() model = build_model(len(vocab.word2index), load_ckpt=True, ckpt_epoch=ckpt_epoch) bot = BotAgent(model, vocab) while True: user_input = raw_input('me: ') if user_input.strip() == '': continue response = bot.response(user_input) print('%s: %s' % (BOT_NAME, response)) curr_sys = platform.system() if curr_sys == 'Linux': os.system('echo %s | festival --tts' % response) elif curr_sys == 'Darwin': os.system('say &s' % response)
def load_checkpoint(filepath): checkpoint = torch.load(filepath) learning_rate = checkpoint['learning_rate'] hidden_units = checkpoint['hidden'] class_to_idx = checkpoint['class_to_idx'] model_type = checkpoint['model_type'] output_size = len(class_to_idx) model = build_model(class_to_idx, model_type, hidden_units, output_size) model.load_state_dict(checkpoint['state_dict']) return model, learning_rate, hidden_units, class_to_idx
def load_checkpoint(checkpoint): ''' Load the checkpoint file and build model ''' state = torch.load(checkpoint) arch = state['arch'] lr = float(state['learning_rate']) hidden_units = int(state['hidden_units']) model, optimizer, criterion = \ model_utils.build_model(arch, hidden_units, lr) model.class_to_idx = state['class_to_idx'] model.load_state_dict(state['state_dict']) optimizer.load_state_dict(state['optimizer']) return model
def train(): data_set = build_data_loader(batch_size=BATCH_SIZE) vocabulary_list = sorted(data_set.vocabulary.word2index.items(), key=lambda x: x[1]) save_vocabulary(vocabulary_list) vocab_size = data_set.get_vocabulary_size() model = build_model(vocab_size) model_optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) start = time.time() data_set_len = len(data_set) epoch = 0 print_loss_total = 0.0 print('Start Training.') while epoch < N_EPOCHS: epoch += 1 input_group, target_group = data_set.random_batch() # zero gradients model_optimizer.zero_grad() # run seq2seq all_decoder_outputs = model(input_group, target_group, teacher_forcing_ratio=1) target_var, target_lens = target_group # loss calculation and back-propagation loss = masked_cross_entropy( all_decoder_outputs.transpose(0, 1).contiguous(), target_var.transpose(0, 1).contiguous(), target_lens) print_loss_total += loss.data loss.backward() clip_grad_norm_(model.parameters(), CLIP) # update parameters model_optimizer.step() if epoch % PRINT_EVERY == 0: test_loss = model_evaluate(model, data_set) print_summary(start, epoch, math.exp(print_loss_total / PRINT_EVERY)) print('Test PPL: %.4f' % math.exp(test_loss)) print_loss_total = 0.0 if epoch % SAVE_EVERY == 0: save_model(model, epoch) # break save_model(model, epoch)
def __init__(self, checkpoint_dir, checkpoint_addr, log_csv_addr, tensorboard_logs, train_keys_path, path_to_data, val_frac, final_model_path, mode, threshold_path, img_path, restore): print("\nTRAINING FOR {} MODE -----\n".format(mode)) timesteps = 5 data_loader = helper(path_to_data, val_frac=val_frac, timesteps=timesteps, shift=2) build_keys = { 'input_shape': (timesteps, data_loader.X_train.shape[2]), 'mode': mode, 'timesteps': timesteps } self.train_generator, self.val_generator = data_loader.generator_train_val( train_keys_path=train_keys_path, restore=restore, mode=build_keys['mode']) self.model = build_model(build_keys) self.num_batches_train = data_loader.num_batches_train self.num_batches_val = data_loader.num_batches_val self.checkpoint_dir = checkpoint_dir self.checkpoint_addr = checkpoint_addr self.log_csv_addr = log_csv_addr self.tensorboard_logs = tensorboard_logs self.final_model_path = final_model_path self.img_path = img_path self.neg_data_yielder_val = data_loader.single_yielder( data_loader.X_val_y0, batch_size=32) self.pos_data_yielder_val = data_loader.single_yielder( data_loader.X_val_y1, batch_size=32) print(data_loader.X_val_y1.shape) self.neg_data_yielder_train = data_loader.single_yielder( data_loader.X_train_y0, batch_size=32) self.pos_data_yielder_train = data_loader.single_yielder( data_loader.X_train_y1, batch_size=32) self.threshold_path = threshold_path
def run_train_all(self): self.logger.info(f"training on all data...") train_ds = datasets.SpectrogramDataset( self.df, self.data_dir, sample_rate=self.config.sample_rate, composer=self.train_composer, secondary_label=self.config.secondary_label ) train_dl = torch.utils.data.DataLoader( train_ds, shuffle=True, **self.config.dataloader ) model = model_utils.build_model( self.config.model.name, n_class=self.n_class, in_chans=self.config.model.in_chans, pretrained=self.config.model.pretrained, ) if self.config.multi and self.config.gpu: self.logger.info("Using pararell gpu") model = nn.DataParallel(model) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), float(self.config.learning_rate)) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 10) if self.config.mixup: self.logger.info("use mixup") model_utils.train_model( epoch=self.epoch, model=model, train_loader=train_dl, val_loader=None, optimizer=optimizer, scheduler=scheduler, criterion=criterion, device=self.device, threshold=self.config.threshold, best_model_path=None, logger=self.logger, mixup=self.config.mixup, ) model_utils.save_pytorch_model(model, self.save_dir / "all_model.pth") self.logger.info(f'save model to {self.save_dir / "all_model.pth"}')
def main(): # ------------------------------- # Multi-GPUs training checking if config["multi_GPUs_training"] and config["num_GPUs"]<2: print("Error, at least two GPUs when doing multi-GPUs training") if config["multi_GPUs_training"]: if config["batch_size"]<config["num_GPUs"]: config["batch_size"] = config["num_GPUs"] if config["validation_batch_size"]<config["num_GPUs"]: config["validation_batch_size"] = config["num_GPUs"] batch_per_gpu = config["batch_size"]//config["num_GPUs"] # make sure the batch_size is dividable by num_gpus config["batch_size"] = config["num_GPUs"]*batch_per_gpu batch_per_gpu = config["validation_batch_size"]//config["num_GPUs"] config["validation_batch_size"] = config["validation_batch_size"] print("Do multi-GPUs training") print("batch size: %d" % (config["batch_size"])) print("validation batch size: %d" % (config["validation_batch_size"])) print("Traing data path: %s" % (config["datasets_path"])) # ------------------------------- # get all datasets file names data_files = fetch_data_files(config["datasets_path"], config["training_modalities"] + config["output_modalities"]) print("num of datasets %d" % (len(data_files))) # target domain files targetdata_files = fetch_data_files(config["targetdata_path"], config["target_modalities"]) # ------------------------------- # create data generator for training and validatation, it can load the data from memory pretty fast using multiple workers and buffers if you need to load your data batch by batch training_list, validation_list = get_validation_split(data_files, config["training_file"], config["validation_file"], data_split=config["validation_split"]) # To make sure the num of training and validation cases is dividable by num_gpus when doing multi-GPUs training if config["multi_GPUs_training"]: num_training_list = len(training_list)//config["batch_size"] * config["batch_size"] num_validation_list = len(validation_list)//config["batch_size"] * config["batch_size"] training_list = training_list[0:num_training_list] validation_list = validation_list[0:num_validation_list] training_generator = DataGenerator(data_files, training_list, batch_size=config["batch_size"], shuffle=True, input_shape=config["image_shape"], targetdata_files=targetdata_files) validation_generator = DataGenerator(data_files, validation_list, batch_size=config["batch_size"], shuffle=False, input_shape=config["image_shape"], targetdata_files=targetdata_files) print("num of training cases %d" % (len(training_list))) print("num of validation cases %d" % (len(validation_list))) # ------------------------------- # Build neural network structure print("Build model") unet, unet_t = unet_model_3d(input_shape=config["input_shape"], pool_size=config["pool_size"], deconvolution=config["deconvolution"], depth=config["layer_depth"] , n_base_filters=config["n_base_filters"], kernel = config["conv_kernel"], batch_normalization=config["batch_normalization"], activation_name=config["activation"]) # unet_t is used to save the weights of the model used for target domain # save the model s tructure # save_model(unet_t, config["model_file"]) print("unet summary") print(unet.summary()) # ------------------------------ # compile the model. Please change your optimizer based on your needs print("Compile models") #optimizer = optimizers.RMSprop(lr=config["initial_learning_rate"], rho=0.9) #optimizer = optimizers.SGD(lr=config["initial_learning_rate"], decay=1e-6, momentum=0.9, nesterov=True) #optimizer = optimizers.Adagrad(lr=config["initial_learning_rate"], epsilon=None, decay=0.0) optimizer = optimizers.Adam(lr=config["initial_learning_rate"], beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model, parallel_model = build_model(unet, multi_gpus_training = config["multi_GPUs_training"], num_gpus = config["num_GPUs"], optimizer = optimizer, loss = config["loss"], loss_weight = config["loss_weight"], metrics = config["metrics"] ) # ------------------------------- # run training train_model(save_model = unet_t, model = model, parallel_model = parallel_model, model_file = config["model_file"], weight_file = config["model_weight_file"], training_generator = training_generator, validation_generator = validation_generator, steps_per_epoch = len(training_list)//config["batch_size"], validation_steps = len(validation_list)//config["validation_batch_size"], initial_learning_rate = config["initial_learning_rate"], learning_rate_drop = config["learning_rate_drop"], learning_rate_patience = config["patience"], early_stopping_patience= config["early_stop"], n_epochs=config["n_epochs"], n_workers = config["n_workers"], use_multiprocessing = config["use_multiprocessing"], max_queue_size = config["max_queue_size"],)
train, test = train_test_split(data, test_size=0.1, shuffle=True) file_id_mapping_train = { f: 'normal' if u'正常' in f else 'defect' for f in train } file_id_mapping_test = {f: 'normal' if u'正常' in f else 'defect' for f in test} file_id_mapping_all = {f: 'normal' if u'正常' in f else 'defect' for f in data} train_gen = sample_gen(file_id_mapping_train) # print gen(train_gen, batch_size).next() test_gen = sample_gen(file_id_mapping_test) all_gen = sample_gen(file_id_mapping_all) model = build_model() model = multi_gpu_model(model, gpus=3) model.compile(loss=identity_loss, optimizer=Adam(0.000001)) # checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min') # early = EarlyStopping(monitor="val_loss", mode="min", patience=5) # reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3) # callbacks_list = [checkpoint, early, reduce_lr] # early # history = model.fit_generator(gen(train_gen, batch_size), validation_data=gen(test_gen, batch_size), epochs=60, verbose=1, workers=4, use_multiprocessing=True, # callbacks=callbacks_list, steps_per_epoch=500, validation_steps=30) # model.compile(loss=identity_loss, optimizer=SGD(0.000001))
def run_train_cv(self): oof_preds = np.zeros((len(self.df), self.n_class)) best_val_loss = 0 for i_fold, (trn_idx, val_idx) in enumerate(self.fold_indices): self.logger.info("-" * 10) self.logger.info(f"fold: {i_fold}") train_df = self.df.iloc[trn_idx].reset_index(drop=True) val_df = self.df.iloc[val_idx].reset_index(drop=True) # concat nocall df # val_df = pd.concat([val_df, self.nocall_df]).reset_index() train_ds = datasets.SpectrogramDataset( train_df, self.data_dir, sample_rate=self.config.sample_rate, composer=self.train_composer, secondary_label=self.secondary_label, ) valid_ds = datasets.SpectrogramDataset( val_df, self.data_dir, sample_rate=self.config.sample_rate, composer=self.val_composer, secondary_label=self.secondary_label ) train_dl = torch.utils.data.DataLoader( train_ds, shuffle=True, **self.config.dataloader ) # reduce batchsize for avoiding cudnn error valid_dl = torch.utils.data.DataLoader( valid_ds, shuffle=False, num_workers=self.config.dataloader.num_workers, batch_size=int(self.config.dataloader.batch_size / 2), pin_memory=self.config.dataloader.pin_memory, ) model = model_utils.build_model( self.config.model.name, n_class=self.n_class, in_chans=self.config.model.in_chans, pretrained=self.config.model.pretrained, ) if self.config.multi and self.config.gpu: self.logger.info("Using pararell gpu") model = nn.DataParallel(model) # criterion = nn.BCELoss() criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), float(self.config.learning_rate)) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 10) best_model_path = self.save_dir / f"best_model_fold{i_fold}.pth" if self.config.mixup: self.logger.info("use mixup") best_val_loss += model_utils.train_model( epoch=self.epoch, model=model, train_loader=train_dl, val_loader=valid_dl, optimizer=optimizer, scheduler=scheduler, criterion=criterion, device=self.device, threshold=self.config.threshold, best_model_path=best_model_path, logger=self.logger, mixup=self.config.mixup, ) model = model_utils.load_pytorch_model( model_name=self.config.model.name, path=best_model_path, n_class=self.n_class, in_chans=self.config.model.in_chans, ) preds = model_utils.predict( model, valid_dl, self.n_class, self.device, sigmoid=True ) oof_preds[val_idx, :] = preds # oof_score = self.metrics(self.y, oof_preds) best_val_loss /= len(self.fold_indices) return oof_preds, best_val_loss
# label mapping with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) ## building the model # hyperparameters hidden_units = int(args['hidden_units']) output_size = len(class_to_idx) dropout = 0.5 learning_rate = 0.002 model_type = args['arch'] # create the model model = build_model(class_to_idx, model_type, hidden_units, output_size, dropout) # define criterion and optimizer criterion = nn.NLLLoss() optimizer = optim.SGD(model.classifier.parameters(), lr=learning_rate, momentum=0.9) # train the network epochs = int(args['epochs']) print_every = 40 steps = 0 ## change to cuda device = 'cuda' if args['device'] == 'gpu' else 'cpu'
k: v for k, v in zip(train.images.values, train.classes.values) } file_id_mapping_test = { k: v for k, v in zip(test.images.values, test.classes.values) } file_id_mapping_all = { k: v for k, v in zip(data.images.values, data.classes.values) } all_gen = sample_gen(file_id_mapping_all) model, base_model = build_model() model = multi_gpu_model(model, gpus=4) model.compile(loss=identity_loss, optimizer=Adam(0.000001)) def compute_distance(file_vertors, file, files, multiple, sample_num, reverse): distances = [(f, euclidean(file_vertors[file], file_vertors[f])) for f in files] sample_num = max(int(len(distances) * 0.8**multiple), 2) distances = sorted(distances, key=lambda distances: distances[1], reverse=reverse)[:sample_num] return [d[0] for d in distances]
def vp3d_recipe2(self): clip_df = interpolate(self.clip_df,interpolate_feet=False) clip_df = delete_nans(clip_df) multiplier = round(800/224,2) clip_df = rescale_keypoints(clip_df,multiplier) actions, poses = fetch_keypoints(clip_df) classes = 8 chk_filename = os.path.join(DATAPOINT,'BaseModels', 'epoch_45.bin') pretrained_weights = torch.load(chk_filename, map_location=lambda storage, loc: storage) model = NaiveBaselineModel(in_joints, in_dims, out_joints, filter_widths, pretrained_weights, embedding_len, classes, causal=True, dropout=0.25, channels=channels) receptive_field = model.base_model.receptive_field() pad = (receptive_field - 1) causal_shift = pad chk_filename = os.path.join(CHECKPATH,"Recipe-2-epoch-19.pth") checkp = torch.load('/home/artursil/Documents/virtual_trainer/Virtual_trainer/checkpoint/Recipe-2-epoch-19.pth') # checkp = torch.load('/home/artursil/Documents/virtual_trainer/Virtual_trainer/checkpoint/model-6.pth') checkp['model_state_dict'] model.load_state_dict(checkp['model_state_dict']) model_rank = SimpleRegression([128,64,32]) chk_filename = os.path.join(CHECKPATH,"regressor-simple-regressor-grouped-512-750.pth") model_rank.load_state_dict(torch.load(chk_filename)['model_state_dict']) with torch.no_grad(): model.eval() if torch.cuda.is_available(): model = model.cuda() model_rank = model_rank.cuda() # poses = poses.cuda() try: poses = np.concatenate(poses) except ValueError: self.prediction = "No human detected" return self poses = np.pad(poses,((54,0),(0,0),(0,0)),'edge') poses = torch.Tensor(np.expand_dims(poses,axis=0)).cuda() # print(f'Poses shape: {poses.shape}') # embeds, preds = model(poses) preds = model(poses) softmax = torch.nn.Softmax(1) pred= softmax(preds) pred = pred.detach().cpu().numpy().squeeze() print(pred) preds = np.argmax(pred,axis=0) print(preds) values, counts = np.unique(preds,return_counts=True) print(values) print(counts) ind = np.argmax(counts) print(values[ind]) print(EXC_DICT[values[ind]]) self.prediction = EXC_DICT[values[ind]] ###### chk_filename = os.path.join(CHECKPATH,"Recipe-2-epoch-19.pth") model = build_model(chk_filename, in_joints, in_dims, out_joints, filter_widths, True, channels, embedding_len,classes) with torch.no_grad(): model.eval() if torch.cuda.is_available(): model = model.cuda() model_rank = model_rank.cuda() # poses = poses.cuda() embeds,preds = model(poses) softmax = torch.nn.Softmax(1) pred= softmax(preds) pred = pred.detach().cpu().numpy().squeeze() print(pred) preds = np.argmax(pred,axis=1) print(preds) values, counts = np.unique(preds,return_counts=True) print(values) print(counts) ind = np.argmax(counts) print(values[ind]) print(EXC_DICT[values[ind]]) self.prediction = EXC_DICT[values[ind]] ###### # ratings=model_rank(embeds).detach().detach().cpu().numpy() # self.rating = np.mean(ratings) return self
help='mini-batch size') parser.add_argument('--epochs', type=int, default=100, help='number of iterations for training') parser.add_argument('--lags', type=int, default=48) args = parser.parse_args() event_info = args.event_info date_time = args.date_time model_type = args.model lags = args.lags metrics = defaultdict(list) model = build_model(lags, model=model_type, summary=True, date_time=args.date_time, combined_model=args.location_aware, event_info=event_info) if args.location_aware: X_train, y_train = load_combined_data(lags=lags, split="train", date_time=date_time, event_info=event_info) X_val, y_val = load_combined_data(lags=lags, split="val", date_time=date_time, event_info=event_info) weights_model = join( exp_logs_root, "{}_{}_{}_weights.best.hdf5".format(lags, model_type, "combined"))
help= f'Sizes of hidden layers in model classifier. Can pass multiple arguments. Default: {" ".join([str(_) for _ in def_hidden_units])}.' ) parser.add_argument( '--output_units', nargs='?', default=def_output_units, type=int, help= f'Size of output layer, or number of prediction classes. Default is {def_output_units}.' ) parser.add_argument( '--epochs', nargs='?', default=def_epochs, type=int, help=f'Number of training epochs to run. Default is {def_epochs}.') parser.add_argument('--gpu', action='store_true', help='Pass this flag to use GPU if available.') args = parser.parse_args() print(args) loaders = build_data_loaders(args.data_dir) model = build_model(args.arch, args.hidden_units, args.output_units) best_model = train(model, args.epochs, args.learning_rate, args.gpu, loaders) now = datetime.datetime.strftime(datetime.datetime.now(), '%Y%m%dT%H%M%S') save_checkpoint(f'{args.save_dir}/checkpoint-{args.arch}-{now}.pth', best_model, args.arch)