def __init__(self, loader): ''' @param loader: Loader instance, used for image retrieval and further parameters ''' self.loader = loader self.log = Logger(verbose=True) self.image_array_list = self.loader.image_array_list self.image_list = self.loader.image_list self.image_path_list = self.loader.image_path_list self.sp_thread = np.zeros((len(self.image_list), 256, 256)) self.sp_thread_lock = threading.Lock() self.max_thread = threading.Semaphore(1) self.log_lock = threading.Lock() self.seq_reading = self.loader.seq_reading self.sp_array = np.array([]) self.sp_labeled = np.array([]) self.sp_labels = []
def main(): use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False pin_memory = True if use_gpu else False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("========\nArgs:{}\n========".format(args)) if use_gpu: print("Currently usimg GPU {}".format(args.gpu_devices)) os.environ['CUDA_VISIBLE_DEVICE'] = args.gpu_devices cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) # 固定随机种子 else: print("Currently using CPU") dataset = data_manager.init_img_dataset( root=args.root, name=args.dataset, spilt_id=args.spilt_id, cuhk03_labeled=args.cuhk03_labeled, cuhk03_classic_spilt=args.cuhk03_classic_spilt, ) # dataloader & augmentation train query gallery transform_train = T.compose([ T.Random2DTranslation(args.height, args.width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])
def main(): parser = argparse.ArgumentParser() parser.add_argument('--mode', type=str, default='train', help='train or eval') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--epochs', type=int, default=10, help='number of training epochs') parser.add_argument('--batch_size', type=int, default=64, help='number of examples to process in a batch') parser.add_argument('--num_classes', type=int, default=6, help='number of target classes') parser.add_argument('--max_norm', type=float, default=5.0, help='max norm of gradient') parser.add_argument('--embed_trainable', type=bool, default=True, help='finetune pre-trained embeddings') parser.add_argument('--kernel_sizes', nargs='+', type=int, default=[2, 3, 4], help='kernel sizes for the convolution layer') parser.add_argument('--device', type=str, default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')) parser.add_argument('--p', type=float, default=0.5, help='dropout rate') parser.add_argument('--c_out', type=int, default=32, help='output channel size of the convolution layer') args = parser.parse_args() if args.mode == 'train': sys.stdout = Logger(TRAIN_LOG_LOC) print_statement('HYPERPARAMETER SETTING') print_flags(args) train(args, MODEL_LOC) else: sys.stdout = Logger(TEST_LOG_LOC) print_statement('HYPERPARAMETER SETTING') print_flags(args) test(args, MODEL_LOC, LABEL_JSON_LOC)
def preprocess_pipeline(args): # [STEP 0] load the .mat files (sample-level) if not os.path.exists(args.path_raw): sys.stdout = Logger(os.path.join(args.path_raw, "log_raw.txt")) print(paint("[STEP 0] Loading the .mat files...")) load_mat(path_data=args.path_data, path_raw=args.path_raw, class_map=args.class_map) else: print(paint("[STEP 0] Files already loaded!")) # [STEP 1] partition the datasets (segment-level) w, s = args.window, args.stride if not os.path.exists(args.path_processed): sys.stdout = Logger( os.path.join(args.path_processed, f"log_{w}_{s}.txt")) print( paint( f"[STEP 1] Partitioning the dataset (window,stride) = ({w},{s})..." )) partition( path_raw=args.path_raw, path_processed=args.path_processed, window=w, stride=s, class_map=args.class_map, ) else: print( paint( f"[STEP 1] Dataset already partitioned (window,stride) = ({w},{s})!" ))
def start_test(experiment_path, epoch, num_plot_samples, timesteps, plot_window_size, device): assert(type(experiment_path) == str) # Load the training and environment arguments, and environment args, env_args = load_args(experiment_path) env = env_dict[env_args['env_name']](device=device) # Set the parameters from args act_func_disc = act_funcs[args['act_func_disc']] act_func_gen = act_funcs[args['act_func_gen']] # Parameters for the test args['device'] = device args['do_logging'] = True args['experiment_name'] = 'TEST_' + str(env.name) args['show_plots'] = True args['original_experiment_path'] = experiment_path # save the experiment path which is being tested # Load the logger and the plotter the_logger = Logger(args) the_plotter = TestPlotter(args, the_logger, num_plot_samples=num_plot_samples, linspace_size=201, timesteps=timesteps) # Set plotter settings if plot_window_size is 'args': the_plotter.plot_window_size = env.plot_window_size else: args['plot_window'] = plot_window_size # Get the neural network models and load them discriminator = DiscNet(dim=env.dim, ns=args['ns'], act_func=act_func_disc, hh=args['hh'], device=device, psi_func=env.psi_func, TT=env.TT) generator = GenNet(dim=env.dim, ns=args['ns'], act_func=act_func_gen, hh=args['hh'], device=device, mu=None, std=None, TT=env.TT) disc_load_path = join(experiment_path, 'models', 'discriminator-epoch-' + str(epoch) + '.pth.tar') gen_load_path = join(experiment_path, 'models', 'generator-epoch-' + str(epoch) + '.pth.tar') disc_load = torch.load(disc_load_path, map_location=lambda storage, loc: storage) # Puts model on CPU gen_load = torch.load(gen_load_path, map_location=lambda storage, loc: storage) discriminator.load_state_dict(disc_load['model_state_dict']) generator.load_state_dict(gen_load['model_state_dict']) generator.mu, generator.std = gen_load['gen_mu'], gen_load['gen_std'] discriminator.eval() generator.eval() # The iteration to generate plots for idx in range(0, 100): the_plotter.make_plots(idx, generator, the_logger) the_plotter.make_animation(generator, 60) time.sleep(3) the_plotter.make_new_rho0()
def main(): parser = get_parser_ens() args = parser.parse_args() args.method = os.path.basename(__file__).split('-')[1][:-3] if args.aug_test: args.method = args.method + '_augment' torch.backends.cudnn.benchmark = True compute = { 'CIFAR10': ['VGG16BN', 'PreResNet110', 'PreResNet164', 'WideResNet28x10'], 'CIFAR100': ['VGG16BN', 'PreResNet110', 'PreResNet164', 'WideResNet28x10'], 'ImageNet': ['ResNet50'] } for model in compute[args.dataset]: args.model = model logger = Logger(base='./logs/') print('-' * 5, 'Computing results of', model, 'on', args.dataset + '.', '-' * 5) loaders, num_classes = get_data(args) targets = get_targets(loaders['test'], args) args.num_classes = num_classes model = get_model(args) for run in range(1, 6): log_probs = [] fnames = read_models(args, base=os.path.expanduser(args.models_dir), run=run if args.dataset != 'ImageNet' else -1) fnames = sorted(fnames, key=lambda a: int(a.split('-')[-1].split('.')[0])) for ns in range(100)[:min( len(fnames), 100 if args.dataset != 'ImageNet' else 50)]: start = time.time() model.load_state_dict(get_sd(fnames[ns], args)) ones_log_prob = one_sample_pred(loaders['test'], model) log_probs.append(ones_log_prob) logger.add_metrics_ts(ns, log_probs, targets, args, time_=start) logger.save(args) os.makedirs('.megacache', exist_ok=True) logits_pth = '.megacache/logits_%s-%s-%s-%s-%s' logits_pth = logits_pth % (args.dataset, args.model, args.method, ns + 1, run) log_prob = logsumexp(np.dstack(log_probs), axis=2) - np.log(ns + 1) print('Save final logprobs to %s' % logits_pth, end='\n\n') np.save(logits_pth, log_prob)
def main(): parser = get_parser_ens() args = parser.parse_args() args.method = os.path.basename(__file__).split('-')[1][:-3] torch.backends.cudnn.benchmark = True if args.aug_test: args.method = args.method + '_augment' print('Computing for all datasets!') compute = { 'CIFAR10': ['VGG16BN', 'WideResNet28x10do'], 'CIFAR100': ['VGG16BN', 'WideResNet28x10do'] } for model in compute[args.dataset]: args.model = model logger = Logger() print('-' * 5, 'Computing results of', model, 'on', args.dataset + '.', '-' * 5) loaders, num_classes = get_data(args) targets = get_targets(loaders['test'], args) fnames = read_models(args, base=os.path.expanduser(args.models_dir)) args.num_classes = num_classes model = get_model(args) for try_ in range(1, 6): fnames = np.random.permutation(fnames) model.load_state_dict(get_sd(fnames[0], args)) log_probs = [] for ns in range(100): start = time.time() ones_log_prob = one_sample_pred(loaders['test'], model) log_probs.append(ones_log_prob) logger.add_metrics_ts(ns, log_probs, targets, args, time_=start) logger.save(args) os.makedirs('./.megacache', exist_ok=True) logits_pth = '.megacache/logits_%s-%s-%s-%s-%s' logits_pth = logits_pth % (args.dataset, args.model, args.method, ns + 1, try_) log_prob = logsumexp(np.dstack(log_probs), axis=2) - np.log(ns + 1) print('Save final logprobs to %s' % logits_pth) np.save(logits_pth, log_prob) print('Used weights from %s' % fnames[0], end='\n\n')
def __init__(self, access_key=None, secret_key=None): self.logging = Logger( self.__class__.__name__ ).get_logger() self.logging.debug( "Initiate class for opswork environments: %s" % (self.__class__.__name__) ) if settings.ACCESS_KEY is None or settings.SECRET_KEY is None: self.access_key = access_key self.secret_key = secret_key else: self.access_key = settings.ACCESS_KEY self.secret_key = settings.SECRET_KEY if self.access_key is None or self.secret_key is None: raise ExpectedAWSKeys( "Please, provide a secret key and acces key aws, see: http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html" )
def main(): # get experiment arguments args, config_dataset, config_model = get_args() # [STEP 0 and 1] load the .mat files (sample-level) and partition the datasets (segment-level) preprocess_pipeline(args) if args.train_mode: # [STEP 2] create HAR datasets dataset = SensorDataset(**config_dataset, prefix="train") dataset_val = SensorDataset(**config_dataset, prefix="val") # [STEP 3] create HAR models if torch.cuda.is_available(): model = create(args.model, config_model).cuda() torch.backends.cudnn.benchmark = True sys.stdout = Logger( os.path.join(model.path_logs, f"log_main_{args.experiment}.txt")) # show args print("##" * 50) print(paint(f"Experiment: {model.experiment}", "blue")) print( paint( f"[-] Using {torch.cuda.device_count()} GPU: {torch.cuda.is_available()}" )) print(args) get_info_params(model) get_info_layers(model) print("##" * 50) # [STEP 4] train HAR models model_train(model, dataset, dataset_val, args) # [STEP 5] evaluate HAR models dataset_test = SensorDataset(**config_dataset, prefix="test") if not args.train_mode: config_model["experiment"] = "inference" model = create(args.model, config_model).cuda() model_eval(model, dataset_test, args)
def main(): parser = get_parser_ens() args = parser.parse_args() args.method = os.path.basename(__file__).split('-')[1][:-3] torch.backends.cudnn.benchmark = True if args.aug_test: args.method = args.method + '_augment' os.makedirs('./logs', exist_ok=True) compute = { 'CIFAR10': ['BayesVGG16BN', 'BayesPreResNet110', 'BayesPreResNet164', 'BayesWideResNet28x10'], 'CIFAR100': ['BayesVGG16BN', 'BayesPreResNet110', 'BayesPreResNet164', 'BayesWideResNet28x10'], 'ImageNet': ['BayesResNet50'] } for model in compute[args.dataset]: args.model = model logger = Logger() print('-'*5, 'Computing results of', model, 'on', args.dataset + '.', '-'*5) loaders, num_classes = get_data(args) targets = get_targets(loaders['test'], args) fnames = read_models(args, base=os.path.expanduser(args.models_dir)) args.num_classes = num_classes model = get_model(args) for run in range(1, 6): print('Repeat num. %s' % run) log_probs = [] checkpoint = get_sd(fnames[0], args) model.load_state_dict(checkpoint) for ns in range(100 if args.dataset != 'ImageNet' else 50): start = time.time() ones_log_prob = one_sample_pred(loaders['test'], model) log_probs.append(ones_log_prob) logger.add_metrics_ts(ns, log_probs, targets, args, time_=start) logger.save(args) os.makedirs('.megacache', exist_ok=True) logits_pth = '.megacache/logits_%s-%s-%s-%s-%s' logits_pth = logits_pth % (args.dataset, args.model, args.method, ns+1, run) log_prob = logsumexp(np.dstack(log_probs), axis=2) - np.log(ns+1) np.save(logits_pth, log_prob)
def main(): sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") print("Initialize dataset {}".format(args.dataset)) dataset = h5py.File(args.dataset, 'r') num_videos = len(dataset.keys()) test_keys = [] for key in dataset.keys(): test_keys.append(key) print("Load model") model = DSN(in_dim=args.input_dim, hid_dim=args.hidden_dim, num_layers=args.num_layers, cell=args.rnn_cell) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) if args.model: print("Loading checkpoint from '{}'".format(args.model)) checkpoint = torch.load(args.model) model.load_state_dict(checkpoint) else: start_epoch = 0 if use_gpu: model = nn.DataParallel(model).cuda() evaluate(model, dataset, test_keys, use_gpu) print("Summary") video2summary(os.path.join(args.save_dir, 'result.h5'), args.input, args.save_dir) ####
target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) if opt.learning_policy == '2stream': train_logger = Logger( os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'loss_cls', 'loss_box', 'OBOA', 'MAE', 'MAEP', 'MAEN', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'batch', 'iter', 'loss', 'loss_cls', 'loss_box', 'OBOA', 'MAE', 'MAEP', 'MAEN', 'lr']) from train_2stream import train_epoch if opt.nesterov: dampening = 0 else: dampening = opt.dampening finetune_parameters = []
d_optimizer = optim.Adam(discriminator.parameters(), lr=float(opt.lr_d), betas=(0.5, 0.999)) g_optimizer = optim.Adam(generator.parameters(), lr=float(opt.lr_g), betas=(0.5, 0.999)) loss = nn.BCELoss() # init fixed noise fixed_noise = torch.randn(1, nz, 1, 1, device=gpu) # Additive noise to stabilize Training for DCGAN initial_additive_noise_var = 0.1 add_noise_var = 0.1 # Create Logger instance logger = Logger(model_name='LRPGAN', data_name=opt.dataset, dir_name=outf, make_fresh=True if not opt.cont else False) print('Created Logger') # training for epoch in range(opt.epochs): for n_batch, (batch_data, _) in enumerate(dataloader, 0): batch_size = batch_data.size(0) add_noise_var = adjust_variance(add_noise_var, initial_additive_noise_var, opt.epochs * len(dataloader) * 1 / 4) ############################ # Train Discriminator ########################### # train with real discriminator.zero_grad() real_data = batch_data.to(gpu)
def main(): global args np.random.seed(args.seed) torch.cuda.manual_seed(args.seed) if args.fixed_arc: sys.stdout = Logger(filename='logs/' + args.output_filename + '_fixed.log') else: sys.stdout = Logger(filename='logs/' + args.output_filename + '.log') print(args) data_loaders = load_datasets() controller = Controller(search_for=args.search_for, search_whole_channels=True, num_layers=args.child_num_layers, num_branches=args.child_num_branches, out_filters=args.child_out_filters, lstm_size=args.controller_lstm_size, lstm_num_layers=args.controller_lstm_num_layers, tanh_constant=args.controller_tanh_constant, temperature=None, skip_target=args.controller_skip_target, skip_weight=args.controller_skip_weight) controller = controller.cuda() shared_cnn = SharedCNN(num_layers=args.child_num_layers, num_branches=args.child_num_branches, out_filters=args.child_out_filters, keep_prob=args.child_keep_prob) shared_cnn = shared_cnn.cuda() # https://github.com/melodyguan/enas/blob/master/src/utils.py#L218 controller_optimizer = torch.optim.Adam(params=controller.parameters(), lr=args.controller_lr, betas=(0.0, 0.999), eps=1e-3) # https://github.com/melodyguan/enas/blob/master/src/utils.py#L213 shared_cnn_optimizer = torch.optim.SGD(params=shared_cnn.parameters(), lr=args.child_lr_max, momentum=0.9, nesterov=True, weight_decay=args.child_l2_reg) # https://github.com/melodyguan/enas/blob/master/src/utils.py#L154 shared_cnn_scheduler = CosineAnnealingLR(optimizer=shared_cnn_optimizer, T_max=args.child_lr_T, eta_min=args.child_lr_min) if args.resume: if os.path.isfile(args.resume): print("Loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] # args = checkpoint['args'] shared_cnn.load_state_dict(checkpoint['shared_cnn_state_dict']) controller.load_state_dict(checkpoint['controller_state_dict']) shared_cnn_optimizer.load_state_dict(checkpoint['shared_cnn_optimizer']) controller_optimizer.load_state_dict(checkpoint['controller_optimizer']) shared_cnn_scheduler.optimizer = shared_cnn_optimizer # Not sure if this actually works print("Loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: raise ValueError("No checkpoint found at '{}'".format(args.resume)) else: start_epoch = 0 if not args.fixed_arc: train_enas(start_epoch, controller, shared_cnn, data_loaders, shared_cnn_optimizer, controller_optimizer, shared_cnn_scheduler) else: assert args.resume != '', 'A pretrained model should be used when training a fixed architecture.' train_fixed(start_epoch, controller, shared_cnn, data_loaders)
def train(self, model_dir=constant.train_config['trained_model_dir'], model_name=constant.predict_config['best_model_name']): iteration_step = 0 logger = Logger(self.model_name) start_idx_epoch = 0 for epoch in range(start_idx_epoch, start_idx_epoch+self.num_epochs): print('Executing Epoch: {}'.format(epoch)) #execute each batch for sample in iter(self.train_batch): #extract data and label data = sample['feature'] label = sample['target'] #clear gradient self.optimizer.zero_grad() #forward propagation batch_output = self.classifier_model.nn_model(data) #calculate loss loss = self.error(batch_output, label[:, 0, :]) #claculate gradient and update weight loss.backward() self.optimizer.step() # Find metrics on validation dataset iteration_step += self.batch_size eval_metric = EvaluationMetric(self.target_num_classes) training_loss = eval_metric.calculateLoss(self.valid_batch, self.batch_size, self.classifier_model.nn_model, self.error) test_loss = eval_metric.calculateLoss(self.valid_batch, self.batch_size, self.classifier_model.nn_model, self.error) precision_train, recall_train, f1_train = eval_metric.calculateEvaluationMetric(self.train_batch, self.batch_size, self.classifier_model.nn_model) precision_valid, recall_valid, f1_valid = eval_metric.calculateEvaluationMetric(self.valid_batch, self.batch_size, self.classifier_model.nn_model) print('Epoch: {}, F1-Score (Training Dataset): {}, F1-Score (Validation Dataset): {}, Training Loss: {}, Validation Loss: {}' .format(epoch, f1_train, f1_valid, training_loss, test_loss)) print('Precision(Training Dataset): {}, Precision(Validation Dataset): {}, Recall(Training Dataset): {}, Recall(Validation Dataset): {}' .format(precision_train, precision_valid, recall_train, recall_valid)) #log the metric in graph with tensorboard logger.log(f1_train, f1_valid, training_loss, test_loss, iteration_step) #save the model weights model_filepath = model_dir + os.sep + 'weight_epoch-{}_loss-{}'.format(epoch, training_loss) torch.save(self.classifier_model.nn_model.state_dict(), model_filepath) logger.close()
for n_batch, (batch,_) in enumerate(data_loader): print(batch.shape) y = batch[0:BATCH_SIZE] break #print("ts shape ",test_samples.shape) test_samples = y.permute(0, 1, 3, 2).numpy() test_samples = images_to_vectors(test_samples) test_samples = np.reshape(test_samples,(-1,28,28)) print(test_samples.shape) fi,batch_mask = noisy_images(test_samples) test_samples_nhwc = vectors_to_images(test_samples) test_samples = images_to_vectors(test_samples) logger = Logger(model_name='test', data_name='generator_output') logger1 = Logger(model_name='test', data_name='true_image') logger2 = Logger(model_name='test', data_name='noisy_image') logger1.log_images(test_samples_nhwc,1, 1, 1, 1, '1',format='NHWC'); test_noise_reshape = images_to_vectors(fi) test_noise_reshape = vectors_to_images(test_noise_reshape) logger2.log_images(test_noise_reshape,1, 1, 1, 1,'1' ,format='NHWC'); #batch_mask = np.ones((BATCH_SIZE,28,28),np.float32) #batch_mask[:,9:19,9:19] = 0.0 batch_mask = images_to_vectors(batch_mask) images = tf.placeholder(tf.float32,shape = (BATCH_SIZE,28*28)) recovered_images = tf.reshape(G_sample,(BATCH_SIZE,28*28)) contextual_loss = tf.reduce_sum(tf.contrib.layers.flatten(tf.abs(tf.multiply(images,batch_mask) - tf.multiply(G_sample, batch_mask))), 1)
betas=(0.5, 0.999)) g_optimizer = optim.Adam(generator.parameters(), lr=0.0008, betas=(0.5, 0.999)) dloss = nn.CrossEntropyLoss() gloss = nn.BCELoss() # init fixed noise fixed_noise = torch.randn(1, nz, 1, 1, device=gpu) # Additive noise to stabilize Training for DCGAN initial_additive_noise_var = 0.1 add_noise_var = 0.1 # Create Logger instance logger = Logger(model_name='LRPGAN', data_name=opt.dataset, dir_name=outf) print('Created Logger') # training for epoch in range(opt.epochs): for n_batch, (batch_data, _) in enumerate(dataloader, 0): batch_size = batch_data.size(0) add_noise_var = adjust_variance(add_noise_var, initial_additive_noise_var, opt.epochs * len(dataloader) * 1 / 2) ############################ # Train Discriminator ########################### # train with real discriminator.zero_grad()
class KeysAWS(object): _ec2_connection = None _opsworks_conection = None _iam_connection = None _vpc_connection = None def __init__(self, access_key=None, secret_key=None): self.logging = Logger( self.__class__.__name__ ).get_logger() self.logging.debug( "Initiate class for opswork environments: %s" % (self.__class__.__name__) ) if settings.ACCESS_KEY is None or settings.SECRET_KEY is None: self.access_key = access_key self.secret_key = secret_key else: self.access_key = settings.ACCESS_KEY self.secret_key = settings.SECRET_KEY if self.access_key is None or self.secret_key is None: raise ExpectedAWSKeys( "Please, provide a secret key and acces key aws, see: http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html" ) def __name__(self): return "{}-{}".format(self.access_key, self.secret_key) @property def _vpc_connection(self): _vpc_connection = vpc.connect_to_region( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, region_name=settings.REGION ) self.logging.debug( "The connection with vpc was been succesfully" ) return _vpc_connection @property def _ec2_connection(self): _ec2_connection = ec2.connect_to_region( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, region_name=settings.REGION ) self.logging.debug( "The connection with ec2 was been succesfully" ) return _ec2_connection @property def _iam_connection(self): _iam_connection = iam.connection.IAMConnection( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key ) self.logging.debug( "The connection with iam was been succesfully" ) return _iam_connection @property def _opsworks_conection(self): _opsworks_conection = opsworks.connect_to_region( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, region_name='us-east-1' ) self.logging.debug( "The connection with opsworks was been succesfully" ) return _opsworks_conection def __del__(self): self._ec2_connection.close() self._opsworks_conection.close() self._iam_connection.close() self._vpc_connection.close()
def train(): mkdirs(config.checkpoint_path, config.best_model_path, config.logs) # load data src1_train_dataloader_fake, src1_train_dataloader_real, \ src2_train_dataloader_fake, src2_train_dataloader_real, \ src3_train_dataloader_fake, src3_train_dataloader_real, \ tgt_valid_dataloader = get_dataset(config.src1_data, config.src1_train_num_frames, config.src2_data, config.src2_train_num_frames, config.src3_data, config.src3_train_num_frames, config.tgt_data, config.tgt_test_num_frames, config.batch_size) best_model_ACC = 0.0 best_model_HTER = 1.0 best_model_ACER = 1.0 best_model_AUC = 0.0 # 0:loss, 1:top-1, 2:EER, 3:HTER, 4:ACER, 5:AUC, 6:threshold valid_args = [np.inf, 0, 0, 0, 0, 0, 0, 0] loss_classifier = AverageMeter() classifer_top1 = AverageMeter() net = DG_model(config.model).to(device) ad_net_real = Discriminator().to(device) ad_net_fake = Discriminator().to(device) log = Logger() log.open(config.logs + config.tgt_data + '_log_SSDG.txt', mode='a') log.write( "\n----------------------------------------------- [START %s] %s\n\n" % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 51)) print("Norm_flag: ", config.norm_flag) log.write('** start training target model! **\n') log.write( '--------|------------- VALID -------------|--- classifier ---|------ Current Best ------|--------------|\n' ) log.write( ' iter | loss top-1 HTER AUC | loss top-1 | top-1 HTER AUC | time |\n' ) log.write( '-------------------------------------------------------------------------------------------------------|\n' ) start = timer() criterion = { 'softmax': nn.CrossEntropyLoss().cuda(), 'triplet': HardTripletLoss(margin=0.1, hardest=False).cuda() } optimizer_dict = [ { "params": filter(lambda p: p.requires_grad, net.parameters()), "lr": config.init_lr }, { "params": filter(lambda p: p.requires_grad, ad_net_real.parameters()), "lr": config.init_lr }, ] optimizer = optim.SGD(optimizer_dict, lr=config.init_lr, momentum=config.momentum, weight_decay=config.weight_decay) init_param_lr = [] for param_group in optimizer.param_groups: init_param_lr.append(param_group["lr"]) iter_per_epoch = 10 src1_train_iter_real = iter(src1_train_dataloader_real) src1_iter_per_epoch_real = len(src1_train_iter_real) src2_train_iter_real = iter(src2_train_dataloader_real) src2_iter_per_epoch_real = len(src2_train_iter_real) src3_train_iter_real = iter(src3_train_dataloader_real) src3_iter_per_epoch_real = len(src3_train_iter_real) src1_train_iter_fake = iter(src1_train_dataloader_fake) src1_iter_per_epoch_fake = len(src1_train_iter_fake) src2_train_iter_fake = iter(src2_train_dataloader_fake) src2_iter_per_epoch_fake = len(src2_train_iter_fake) src3_train_iter_fake = iter(src3_train_dataloader_fake) src3_iter_per_epoch_fake = len(src3_train_iter_fake) max_iter = config.max_iter epoch = 1 if (len(config.gpus) > 1): net = torch.nn.DataParallel(net).cuda() for iter_num in range(max_iter + 1): if (iter_num % src1_iter_per_epoch_real == 0): src1_train_iter_real = iter(src1_train_dataloader_real) if (iter_num % src2_iter_per_epoch_real == 0): src2_train_iter_real = iter(src2_train_dataloader_real) if (iter_num % src3_iter_per_epoch_real == 0): src3_train_iter_real = iter(src3_train_dataloader_real) if (iter_num % src1_iter_per_epoch_fake == 0): src1_train_iter_fake = iter(src1_train_dataloader_fake) if (iter_num % src2_iter_per_epoch_fake == 0): src2_train_iter_fake = iter(src2_train_dataloader_fake) if (iter_num % src3_iter_per_epoch_fake == 0): src3_train_iter_fake = iter(src3_train_dataloader_fake) if (iter_num != 0 and iter_num % iter_per_epoch == 0): epoch = epoch + 1 param_lr_tmp = [] for param_group in optimizer.param_groups: param_lr_tmp.append(param_group["lr"]) net.train(True) ad_net_real.train(True) optimizer.zero_grad() adjust_learning_rate(optimizer, epoch, init_param_lr, config.lr_epoch_1, config.lr_epoch_2) ######### data prepare ######### src1_img_real, src1_label_real = src1_train_iter_real.next() src1_img_real = src1_img_real.cuda() src1_label_real = src1_label_real.cuda() input1_real_shape = src1_img_real.shape[0] src2_img_real, src2_label_real = src2_train_iter_real.next() src2_img_real = src2_img_real.cuda() src2_label_real = src2_label_real.cuda() input2_real_shape = src2_img_real.shape[0] src3_img_real, src3_label_real = src3_train_iter_real.next() src3_img_real = src3_img_real.cuda() src3_label_real = src3_label_real.cuda() input3_real_shape = src3_img_real.shape[0] src1_img_fake, src1_label_fake = src1_train_iter_fake.next() src1_img_fake = src1_img_fake.cuda() src1_label_fake = src1_label_fake.cuda() input1_fake_shape = src1_img_fake.shape[0] src2_img_fake, src2_label_fake = src2_train_iter_fake.next() src2_img_fake = src2_img_fake.cuda() src2_label_fake = src2_label_fake.cuda() input2_fake_shape = src2_img_fake.shape[0] src3_img_fake, src3_label_fake = src3_train_iter_fake.next() src3_img_fake = src3_img_fake.cuda() src3_label_fake = src3_label_fake.cuda() input3_fake_shape = src3_img_fake.shape[0] input_data = torch.cat([ src1_img_real, src1_img_fake, src2_img_real, src2_img_fake, src3_img_real, src3_img_fake ], dim=0) source_label = torch.cat([ src1_label_real, src1_label_fake, src2_label_real, src2_label_fake, src3_label_real, src3_label_fake ], dim=0) ######### forward ######### classifier_label_out, feature = net(input_data, config.norm_flag) ######### single side adversarial learning ######### input1_shape = input1_real_shape + input1_fake_shape input2_shape = input2_real_shape + input2_fake_shape feature_real_1 = feature.narrow(0, 0, input1_real_shape) feature_real_2 = feature.narrow(0, input1_shape, input2_real_shape) feature_real_3 = feature.narrow(0, input1_shape + input2_shape, input3_real_shape) feature_real = torch.cat( [feature_real_1, feature_real_2, feature_real_3], dim=0) discriminator_out_real = ad_net_real(feature_real) ######### unbalanced triplet loss ######### real_domain_label_1 = torch.LongTensor(input1_real_shape, 1).fill_(0).cuda() real_domain_label_2 = torch.LongTensor(input2_real_shape, 1).fill_(0).cuda() real_domain_label_3 = torch.LongTensor(input3_real_shape, 1).fill_(0).cuda() fake_domain_label_1 = torch.LongTensor(input1_fake_shape, 1).fill_(1).cuda() fake_domain_label_2 = torch.LongTensor(input2_fake_shape, 1).fill_(2).cuda() fake_domain_label_3 = torch.LongTensor(input3_fake_shape, 1).fill_(3).cuda() source_domain_label = torch.cat([ real_domain_label_1, fake_domain_label_1, real_domain_label_2, fake_domain_label_2, real_domain_label_3, fake_domain_label_3 ], dim=0).view(-1) triplet = criterion["triplet"](feature, source_domain_label) ######### cross-entropy loss ######### real_shape_list = [] real_shape_list.append(input1_real_shape) real_shape_list.append(input2_real_shape) real_shape_list.append(input3_real_shape) real_adloss = Real_AdLoss(discriminator_out_real, criterion["softmax"], real_shape_list) cls_loss = criterion["softmax"](classifier_label_out.narrow( 0, 0, input_data.size(0)), source_label) ######### backward ######### total_loss = cls_loss + config.lambda_triplet * triplet + config.lambda_adreal * real_adloss total_loss.backward() optimizer.step() optimizer.zero_grad() loss_classifier.update(cls_loss.item()) acc = accuracy(classifier_label_out.narrow(0, 0, input_data.size(0)), source_label, topk=(1, )) classifer_top1.update(acc[0]) print('\r', end='', flush=True) print( ' %4.1f | %5.3f %6.3f %6.3f %6.3f | %6.3f %6.3f | %6.3f %6.3f %6.3f | %s' % ((iter_num + 1) / iter_per_epoch, valid_args[0], valid_args[6], valid_args[3] * 100, valid_args[4] * 100, loss_classifier.avg, classifer_top1.avg, float(best_model_ACC), float(best_model_HTER * 100), float( best_model_AUC * 100), time_to_str(timer() - start, 'min')), end='', flush=True) if (iter_num != 0 and (iter_num + 1) % iter_per_epoch == 0): # 0:loss, 1:top-1, 2:EER, 3:HTER, 4:AUC, 5:threshold, 6:ACC_threshold valid_args = eval(tgt_valid_dataloader, net, config.norm_flag) # judge model according to HTER is_best = valid_args[3] <= best_model_HTER best_model_HTER = min(valid_args[3], best_model_HTER) threshold = valid_args[5] if (valid_args[3] <= best_model_HTER): best_model_ACC = valid_args[6] best_model_AUC = valid_args[4] save_list = [ epoch, valid_args, best_model_HTER, best_model_ACC, best_model_ACER, threshold ] save_checkpoint(save_list, is_best, net, config.gpus, config.checkpoint_path, config.best_model_path) print('\r', end='', flush=True) log.write( ' %4.1f | %5.3f %6.3f %6.3f %6.3f | %6.3f %6.3f | %6.3f %6.3f %6.3f | %s %s' % ((iter_num + 1) / iter_per_epoch, valid_args[0], valid_args[6], valid_args[3] * 100, valid_args[4] * 100, loss_classifier.avg, classifer_top1.avg, float(best_model_ACC), float(best_model_HTER * 100), float(best_model_AUC * 100), time_to_str(timer() - start, 'min'), param_lr_tmp[0])) log.write('\n') time.sleep(0.01)
Normalize([114.7748, 107.7354, 99.475], [1, 1, 1]) ]) temporal_transform = TemporalCenterCrop(opt.sample_duration, opt.downsample) #temporal_transform = TemporalBeginCrop(opt.sample_duration) #temporal_transform = TemporalEndCrop(opt.sample_duration) target_transform = ClassLabel() opt.n_val_samples = 1 validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) data_loader = torch.utils.data.DataLoader(validation_data, batch_size=8, shuffle=False, num_workers=1, pin_memory=True) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc']) if opt.resume_path: print('loading checkpoint {}'.format(opt.resume_path)) checkpoint = torch.load(opt.resume_path) opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) recorder = [] print('run') model.eval() batch_time = AverageMeter() top1 = AverageMeter()
def main(): args = parse_args() args.pretrain = False root_path = 'exps/exp_{}'.format(args.exp) if not os.path.exists(root_path): os.mkdir(root_path) os.mkdir(os.path.join(root_path, "log")) os.mkdir(os.path.join(root_path, "model")) base_lr = args.lr # base learning rate train_dataset, val_dataset = build_dataset(args.dataset, args.data_root, args.train_list) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=args.num_workers, pin_memory=True) model = VNet(args.n_channels, args.n_classes).cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0005) #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.7) model = torch.nn.DataParallel(model) model.train() if args.resume is None: assert os.path.exists(args.load_path) state_dict = model.state_dict() print("Loading weights...") pretrain_state_dict = torch.load(args.load_path, map_location="cpu")['state_dict'] for k in list(pretrain_state_dict.keys()): if k not in state_dict: del pretrain_state_dict[k] model.load_state_dict(pretrain_state_dict) print("Loaded weights") else: print("Resuming from {}".format(args.resume)) checkpoint = torch.load(args.resume, map_location="cpu") optimizer.load_state_dict(checkpoint['optimizer_state_dict']) model.load_state_dict(checkpoint['state_dict']) logger = Logger(root_path) saver = Saver(root_path) for epoch in range(args.start_epoch, args.epochs): train(model, train_loader, optimizer, logger, args, epoch) validate(model, val_loader, optimizer, logger, saver, args, epoch) adjust_learning_rate(args, optimizer, epoch)
# Hyper Parameters BATCH_SIZE = 32 BATCH_SIZE_TEST = 20 LR = 0.001 # learning rate NUM_WORKERS = 12 NUM_CLASSES = 4 LOG_FILE_NAME = './logs/log_' + time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()) + '.log' lr_steps = [30, 60, 90, 120] np.random.seed(42) DATA_PATH = '/home/ubuntu5/wxp/datasets/acne4/VOCdevkit2007/VOC2007/JPEGImages_300' log = Logger() log.open(LOG_FILE_NAME, mode="a") def criterion(lesions_num): if lesions_num <= 5: return 0 elif lesions_num <= 20: return 1 elif lesions_num <= 50: return 2 else: return 3 def trainval_test(cross_val_index, sigma, lam):
class OpsWorkSetup(object): def __init__(self, access_key=None, secret_key=None): self.logging = Logger( self.__class__.__name__ ).get_logger() self.logging.debug( "Initiate class for opswork environments: %s" % (self.__class__.__name__) ) if not settings.ACCESS_KEY or not settings.SECRET_KEY: self.access_key = access_key self.secret_key = secret_key else: self.access_key = settings.ACCESS_KEY self.secret_key = settings.SECRET_KEY if self.access_key is None or self.secret_key is None: raise ExpectedAWSKeys( "Please, provide a secret key and acces key aws, see: http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html" ) @property def conn(self): _conn = opsworks.connect_to_region( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, region_name='us-east-1' ) self.logging.debug( "The connection with opsworks was been succesfully" ) return _conn @property def security_groups(self): _security_groups = ec2.connect_to_region( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, region_name=settings.REGION ) self.logging.debug( "The connection with ec2 was been succesfully" ) return _security_groups @property def describe_subnets(self): _describe_subnets = vpc.connect_to_region( aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key, region_name=settings.REGION ) self.logging.debug( "The connection with vpc was been succesfully" ) return _describe_subnets def create_stack(self): """ create stack for modeling environment """ stack_name = 'ElasticSearchStack-{}'.format(str(uuid.uuid4())[:8]) if (not settings.DEFAULT_INSTANCE_PROFILE_ARN or settings.DEFAULT_INSTANCE_PROFILE_ARN is None or not settings.SERVICE_ROLE_ARN or settings.SERVICE_ROLE_ARN is None): raise ExpectedAWSRoles("Please, provide the correct services roles, see http://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create.html and check README.md about the access required for this roles.") self.stack = self.conn.create_stack( name=stack_name, region=settings.REGION, default_availability_zone=settings.AVAILABLE_ZONE, custom_json="{}".format(settings.CUSTOM_JSON_CHEF), use_custom_cookbooks=True, hostname_theme='Europe_Cities', use_opsworks_security_groups=True, custom_cookbooks_source={"Type": "git", "Url": settings.REPOSITORY_URL}, default_instance_profile_arn=settings.DEFAULT_INSTANCE_PROFILE_ARN, service_role_arn=settings.SERVICE_ROLE_ARN, default_ssh_key_name=settings.SSH_KEY_NAME_DEFAULT, default_os=settings.DEFAULT_OS, configuration_manager=settings.CONFIGURATION_MANAGER ) self.logging.debug( "The stack: {!r} has been created with successfull".format( stack_name ) ) return self.stack def create_security_group(self, network_policies=[]): """ get security groups and modeling template for security groups: :network_policies (list) e.g: [{ 'protocol': 'http', 'from_port': '80', 'to_port': '80', 'cidr_ip': '172.0.0.1/16' }, { 'protocol': 'tcp', 'from_port': '9201', 'to_port': '9201', 'cidr_ip': '172.16.0.1/16' }] ** Observation ** Don't accepet rules with 0.0.0.0/0""" security_groups = self.security_groups.get_all_security_groups() for sg_attributes in security_groups: if u"AWS-OpsWorks-Default-Server" == sg_attributes.name: for new_rule in network_policies: print new_rule try: sg_attributes.authorize( new_rule['protocol'], new_rule['from_port'], new_rule['to_port'], new_rule['cidr_ip'] ) self.logging.info( "The new rule: {!r}, {!r}, {!r}, {!r} has been created on security group: {!r}".format( new_rule['protocol'], new_rule['from_port'], new_rule['to_port'], new_rule['cidr_ip'], sg_attributes.name ) ) except EC2ResponseError: self.logging.info( "Specified Rule already exists...skipped" ) pass # If put rule with "0.0.0.0/0" will be deleted. # I decided put this code here, just to force no to have world rule for anywhere for rule in sg_attributes.rules: for grant in rule.grants: if u"0.0.0.0/0" == grant.cidr_ip: sg_attributes.revoke( rule.ip_protocol, rule.from_port, rule.to_port, grant.cidr_ip ) self.logging.info( "The rule: {!r}, {!r}, {!r}, {!r} has been deleted on security group: {!r}.".format( rule.ip_protocol, rule.from_port, rule.to_port, grant.cidr_ip, sg_attributes.name ) ) def vpc_data_network(self, protocol='tcp', cidr_ips=[]): """ This method is just for get and management vpc informcations: :protocol (string): :cidr_ips (list): e.g: [{ 'protocol': 'http', 'from_port': '80', 'to_port': '80', 'cidr_ip': '172.0.0.1/16' }, { 'protocol': 'tcp', 'from_port': '9200', 'to_port': '9200', 'cidr_ip': '172.1.0.1/16' }]""" network_policies = [] if not cidr_ips: # Get default subnets on defauilt VPC (my case) subnets = self.describe_subnets.get_all_subnets() for subnet in subnets: cidr_ips.append(subnet.cidr_block) network_policies = [{ 'protocol': 'tcp', 'from_port': 9300, 'to_port': 9300, 'cidr_ip': [cidr_ips[0], cidr_ips[1]] }, { 'protocol': 'tcp', 'from_port': 9201, 'to_port': 9201, 'cidr_ip': [cidr_ips[0], cidr_ips[1]] }, { 'protocol': 'tcp', 'from_port': 80, 'to_port': 80, 'cidr_ip': [cidr_ips[0], cidr_ips[1]] }, { 'protocol': 'tcp', 'from_port': 443, 'to_port': 443, 'cidr_ip': [cidr_ips[0], cidr_ips[1]] }] else: for cidr_ip in cidr_ips: network_policies.append({ 'protocol': cidr_ip['protocol'], 'from_port': cidr_ip['from_port'], 'to_port': cidr_ip['to_port'], 'cidr_ip': cidr_ip['cidr_ip'] }) if not network_policies: raise ExpectedSubnetsAndVPC("Well, in this case, it's necessary to create one VPC and two subnets for this region") self.logging.debug("will be created network policies and adjusted with parameters: {}".format( network_policies ) ) self.create_security_group(network_policies=network_policies) def create_layer(self, new_stack=False, stack_id=None): """ The method is just for create layer: :new_stack (booblean): :stack_id (string):""" layer_name = 'ElasticSearchLayer-{}'.format(str(uuid.uuid4())[:8]) if new_stack and stack_id is None: new_stack_id = self.create_stack()['StackId'] if stack_id: new_stack_id = stack_id self.stack['StackId'] = new_stack_id self.layer_created = self.conn.create_layer( stack_id=self.stack['StackId'], type='custom', name=layer_name, volume_configurations=settings.EBS_VOLUM, shortname='elasticsearchlayer', custom_instance_profile_arn=settings.DEFAULT_INSTANCE_PROFILE_ARN, auto_assign_elastic_ips=True, custom_recipes=settings.RECIPES ) self.logging.debug( "The layer: {!r} has been created with successfull".format( layer_name ) ) self.vpc_data_network() return self.layer_created def create_instances(self, number_instances=3, subnets_list=[], new_layer=True, new_stack=True, stack_id=None, layer_id=[], cidr_ips=[], **kwargs): """The method is just for create instances: :number_instances (int): Number of the instances you want create :subnets_list (list): list with the subnets for input your instances, example: [ 172.0.0.1/16, 172.1.0.1/16 ] :new_layer (boolean): If you want create a new layer before or input in specific layer, expected LayerId :new_stack (boolean): if you want create a new stack before or input in specific stack, expected StackId :layer_id (list): If new_layer is False, i need a list with layer ids, example: [ 'foor', 'bar' ] :cidr_ips (list): Set the ips list with arbitrary cidr_ips :**kwargs (dict): dict with another increments for boto.opsworks method """ if new_layer and not layer_id: new_layer_id = [self.create_layer(new_stack=new_stack)['LayerId']] if layer_id: new_layer_id = layer_id if not new_stack: self.stack = { 'StackId': stack_id } if subnets_list: if len(subnets_list) != number_instances: raise ParameterProblems("number instances and subnets_list needed the same lenght.") else: subnets_list=None for loop in range(0, number_instances): if subnets_list: new_subnets_list = subnets_list[loop] else: new_subnets_list = None instance_created = self.conn.create_instance( stack_id=self.stack['StackId'], layer_ids=new_layer_id, root_device_type='ebs', instance_type=settings.INSTANCE_TYPE, subnet_id=new_subnets_list, **kwargs ) self.logging.debug( "The {!r} instance(s) has been created with successfull: stack_id: {!r}, layer_id: {!r}, instance_type: {!r}, subnets: {!r}".format( number_instances, self.stack['StackId'], new_layer_id, settings.INSTANCE_TYPE, new_subnets_list ) ) self.conn.start_instance(instance_created['InstanceId']) if cidr_ips: rules=[] for cidr_ip in cidr_ips: rules.append({ 'protocol': 'tcp', 'from_port': 80, 'to_port': 80, 'cidr_ip': cidr_ip }) rules.append({ 'protocol': 'tcp', 'from_port': 9201, 'to_port': 9201, 'cidr_ip': cidr_ip }) rules.append({ 'protocol': 'tcp', 'from_port': 443, 'to_port': 443, 'cidr_ip': cidr_ip }) rules.append({ 'protocol': 'tcp', 'from_port': 9300, 'to_port': 9300, 'cidr_ip': cidr_ip }) self.vpc_data_network(cidr_ips=rules) def managament_instance(self, instance_id, action='stop'): """ This class, is just for management instances (stop, start etc...) :instance_id (string): :action (string) - specific strings expected, are options: 'stop', 'start', 'delete' """ status = None if action == 'stop': status = self.conn.stop_instance(instance_id) if action == 'start': status = self.conn.start_instance(instance_id) if action == 'delete': status = self.conn.delete_instance(instance_id=instance_id, delete_elastic_ip=True, delete_volumns=True) if not status: raise UnrecognizedComand("Plase, try again with: 'stop', 'start', or 'delete' command.") return status
learning_rate=0.001, batch_size=args.batch_size_vae, epochs=args.epochs_vae) config_temporal = dict(model_name='LSTM', dynamic_latent_dim=args.dynamic_latent_dim, learning_rate=0.001, batch_size=args.batch_size_lstm, epochs=args.epochs_lstm) experiment_dir.mkdir( parents=True, exist_ok=True) # Create experiment dir if it doesn't exist yet experiment_dir.joinpath('logs').mkdir( parents=True, exist_ok=True) # Create logs dir if it doesn't exist yet runId = datetime.datetime.now().isoformat() sys.stdout = Logger(f'{experiment_dir}/logs/run_{runId}.txt' ) # Redirect print to log file as well as the terminal # __ __ ______ # \ \ / /\ | ____| # \ \ / / \ | |__ # \ \/ / /\ \ | __| # \ / ____ \| |____ # \/_/ \_\______| # SETUP VAE spatial_model = models.Image32x32_VAE(latent_dim=config_spatial['latent_dim'], learn_prior=False) kvae_dataset = ImageSequencesDataset(NPZ_ImageSequencesLoader( spatial_data_filepath.absolute()).get_as_tensor(), return_sequences=False)
def main(): import warnings warnings.filterwarnings("ignore") args = parse_args() os.environ['CUDA_VISIBLE_DEVICES']= f'{args.gpu}' utils.prepare_train_directories(args) log = Logger() log.open(args.log_dir + '/' + args.model_name + f'/fold_{args.fold}' + '/findlr_log.txt', mode='a') log.write('*'*30) log.write('\n') log.write('Logging arguments!!\n') log.write('*'*30) log.write('\n') for arg, value in sorted(vars(args).items()): log.write(f'{arg}: {value}\n') log.write('*'*30) log.write('\n') run(args, log) print('success!')
import re from etc import settings from utils.utils import Logger, APP_ROOT from lib.exceptions import UnknowCIDRRange from lib.opswork_setup import OpsWorkSetup from lib.iam import AWSPolicies __author__ = "Rondineli G. de Araujo" __copyright__ = "Copyright (C) 2015 Rondineli G. Araujo" __version__ = "0.0.1" logging = Logger("OpsWorks Setup").get_logger() logging.debug("Lunch opsworks setup with elasticSearch Cluster") def call(args, parse): if args.access_key: settings.ACCESS_KEY = args.access_key if args.secret_key: settings.SECRET_KEY = args.secret_key if args.service_role_arn: settings.SERVICE_ROLE_ARN = args.service_role_arn if args.instance_arn_role: settings.DEFAULT_INSTANCE_PROFILE_ARN = args.instance_arn_role
def main(): args = parse_args() if args.turnon < 0: args.pretrain = True else: args.pretrain = False print("Using GPU: {}".format(args.local_rank)) root_path = 'exps/exp_{}'.format(args.exp) if args.local_rank == 0 and not os.path.exists(root_path): os.mkdir(root_path) os.mkdir(os.path.join(root_path, "log")) os.mkdir(os.path.join(root_path, "model")) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_dataset, val_dataset = build_dataset(args.dataset, args.data_root, args.train_list, sampling=args.sampling) args.world_size = len(args.gpu.split(",")) if args.world_size > 1: os.environ['MASTER_PORT'] = args.port torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group('nccl') device = torch.device('cuda:{}'.format(args.local_rank)) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=len(args.gpu.split(",")), rank=args.local_rank) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), sampler=train_sampler, num_workers=args.num_workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=args.num_workers, pin_memory=True) model = VNet(args.n_channels, args.n_classes, input_size=64, pretrain=True).cuda(args.local_rank) model_ema = VNet(args.n_channels, args.n_classes, input_size=64, pretrain=True).cuda(args.local_rank) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0005) if args.world_size > 1: model = DDP(model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) model_ema = DDP(model_ema, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) model.train() model_ema.load_state_dict(model.state_dict()) print("Loaded weights") logger = Logger(root_path) saver = Saver(root_path, save_freq=args.save_freq) if args.sampling == 'default': contrast = RGBMoCo(128, K=4096, T=args.temperature).cuda(args.local_rank) elif args.sampling == 'layerwise': contrast = RGBMoCoNew(128, K=4096, T=args.temperature).cuda(args.local_rank) else: raise ValueError("unsupported sampling method") criterion = torch.nn.CrossEntropyLoss() flag = False for epoch in range(args.start_epoch, args.epochs): train_sampler.set_epoch(epoch) train(model, model_ema, train_loader, optimizer, logger, saver, args, epoch, contrast, criterion) validate(model_ema, val_loader, optimizer, logger, saver, args, epoch) adjust_learning_rate(args, optimizer, epoch)
model_name = 'efficientnet-b1' writeFile = '/data1/cby/temp/output_my_aug/logs/' + model_name store_name = '/data1/cby/temp/output_my_aug/weights/' + model_name if not os.path.isdir(store_name): os.makedirs(store_name) model_path = None # model_path = '/data1/cby/temp/output_my_aug/weights/efficientnet-b1/efn-b1_LS_9_loss_0.1610.pth' model = get_efficientnet(model_name=model_name) if model_path is not None: # model = torch.load(model_path) model.load_state_dict(torch.load(model_path, map_location='cpu')) print('Model found in {}'.format(model_path)) else: print('No model found, initializing random model.') model = model.cuda(device_id) train_logger = Logger(model_name=writeFile, header=['epoch', 'loss', 'acc', 'lr']) # criterion = nn.CrossEntropyLoss() criterion = LabelSmoothing(smoothing=0.05).cuda(device_id) # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) # optimizer = optim.Adam(model.parameters(), lr=lr) optimizer = optim.AdamW(model.parameters(), lr=lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) is_train = True if is_train: xdl = DeeperForensicsDatasetNew(real_npys=train_real_paths_npy, fake_npys=train_fake_paths_npy, is_one_hot=True, transforms=None) train_loader = DataLoader(xdl,
return pow((1 - ((epoch) / args['n_epochs'])), 0.9) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda_, ) # clustering cluster = Cluster() # Visualizer visualizer = Visualizer(('image', 'pred', 'sigma', 'seed')) # Logger logger = Logger(('train', 'val', 'iou'), 'loss') # resume start_epoch = 0 best_iou = 0 if args['resume_path'] is not None and os.path.exists(args['resume_path']): print('Resuming model from {}'.format(args['resume_path'])) state = torch.load(args['resume_path']) start_epoch = state['epoch'] + 1 best_iou = state['best_iou'] model.load_state_dict(state['model_state_dict'], strict=True) optimizer.load_state_dict(state['optim_state_dict']) logger.data = state['logger_data'] def train(epoch):
def main(args=None): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices # torch.cuda.set_device(0) use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, args.log_train)) else: sys.stdout = Logger(osp.join(args.save_dir, args.log_test)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset(name=args.dataset) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = False queryloader = DataLoader( VideoDataset(dataset.query, data_name=args.dataset, seq_len=args.seq_len, sample='dense', transform=transform_test, type="test"), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( VideoDataset(dataset.gallery, data_name=args.dataset, seq_len=args.seq_len, sample='dense', transform=transform_test, type="test"), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing models: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, final_dim=args.feat_dim) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict']) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") atest(model, queryloader, galleryloader, use_gpu) return
def super_parsing(argv): ''' @param method: SuperPixel Method to test Possible Methods are: "fh", "qs", "slic" @param params: Params Array for given SuperPixels method implements the superParsing Pipeline ''' #handling Arguments try: opts, args = getopt.getopt(argv, "h", ["help"]) except: usage() sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit(1) if len(args)>0: if args[0]=='fh': #Graph Based method = 'fh' elif args[0]=='qs': #Quick Shift if len(args)==4: method = args[0] else: usage() sys.exit(2) elif args[0]=='slic': #SLIC if len(args)==3: method = args[0] else: usage() sys.exit(2) elif args[0]=='gt': #Ground Truth if len(args)==1: method = args[0] else: usage() sys.exit(2) elif args[0]=='sal': #Saliency if len(args)==2: method = args[0] else: usage() sys.exit(2) elif args[0]=='grid': #GRID if len(args)==2: method = args[0] else: usage() sys.exit(2) else: method = 'fh' #Setting up Environment log = Logger() sfe = segmentFeatureExtractor() using_ubuntu = False #setting paths #Ubuntu Paths if using_ubuntu: home_base = base_ubuntu home_folder = home_base + sub_experiments #Debian Paths else: home_base = base_gnome home_folder = home_base + sub_experiments seg_method = {} ############## Segmentation Methods ################ if method == 'qs': #################### Quick Shift ################### seg_method['method'] = "Quick_Shift" seg_method['ratio'] = float(args[1])#0.05 seg_method['kernelsize'] = int(args[2])#2 seg_method['maxdist'] = int(args[3])#48 if method == 'slic': #################### SLIC ######################## seg_method['method'] = "SLIC" seg_method['region_size'] = int(args[1])#60 seg_method['regularizer'] = int(args[2])#100 if method == 'gt': #################### Ground Truth ################## seg_method['method'] = "Ground_Truth" if method == 'sal': #################### Saliency ####################### seg_method['method'] = "Saliency" seg_method['k'] = int(args[1]) if method == 'grid': #################### GRID ########################### seg_method['method'] = 'GRID' seg_method['k'] = int(args[1]) elif method == 'fh': ####################### FH ########################## seg_method['method'] = "SP" seg_method['desc'] = "Desc" seg_method['k'] = "k200" #"SP_Desc_k200" ############################ PATH SETTINGS ################################# seg_folder = seg_method['method'] seg_params = [seg_method[key] for key in np.sort(np.array(seg_method.keys())) if key!='method'] for i in range(len(seg_params)): seg_folder = seg_folder + "_"+str(seg_params[i]) if not os.path.exists(home_folder + seg_folder): os.makedirs(home_folder + seg_folder) home_images = home_base + "Images" home_label_sets = (home_folder + seg_folder+"/GeoLabels", home_folder + seg_folder+"/SemanticLabels") label_sets = ("GeoLabels", "SemanticLabels") home_sp_label_folder = (home_label_sets[0] + "/SP_Desc_k200", home_label_sets[1] + "/SP_Desc_k200") # home_sp_label_folder = (home_folder + "SPGeoLabels", # home_folder + "SPSemanticLabels") home_data = home_folder + seg_folder + "/Data" home_descriptors = home_data + "/Descriptors" home_descriptors_global = home_descriptors + "/Global" descriptors_global_folder = np.array(["coHist", "colorGist", "SpatialPyrDenseScaled", "Textons/mr_filter", "Textons/sift_textons"]) global_descriptors = np.array(['coHist', 'colorGist', 'SpatialPyrDenseScaled', 'Textons']) home_descriptors_segment = home_descriptors + "/SP_Desc_k200"# + "/" + seg_folder if not os.path.exists(home_descriptors_segment): os.makedirs(home_descriptors_segment) home_super_pixels = home_descriptors_segment + "/super_pixels" if not os.path.exists(home_super_pixels): os.makedirs(home_super_pixels) save_path = home_folder + seg_folder + "/Python/" if not os.path.exists(save_path): os.makedirs(save_path) test_set_file_path = home_folder + "TestSet1.txt" if method == 'gt': seg_method['label_folder'] = home_label_sets[1] if method == 'sal': seg_method['saliency_folder'] = home_folder + seg_folder + '/saliency' ############################################################################ ################# Sequential Reading ####################################### #variable to setup a sequential reading of mat files (saving ram power) seq_reading = True #loading Images loader = Loader.Loader.DataLoader(save_path,save_mode = False, seq_reading=seq_reading) loader.load_images(home_images) #loading/Calculate SuperPixels from/as mat files loader.load_super_pixel(home_super_pixels, seg_method)
def main(args=None): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices # torch.cuda.set_device(0) use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, args.log_train)) else: sys.stdout = Logger(osp.join(args.save_dir, args.log_test)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset(name=args.dataset) transform_train = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), T.RandomErasing(), ]) transform_train2 = T.Compose([ T.Resize((args.height, args.width)), T.Random2DTranslation(args.height, args.width), ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = False trainloader = DataLoader( VideoDataset(dataset.train, data_name=args.dataset, seq_len=args.seq_len, sample='random', transform=transform_train, transform2=transform_train2, type="train"), sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( VideoDataset(dataset.query, data_name=args.dataset, seq_len=args.seq_len, sample='dense', transform=transform_test, type="test"), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( VideoDataset(dataset.gallery, data_name=args.dataset, seq_len=args.seq_len, sample='dense', transform=transform_test, type="test"), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing models: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids, final_dim=args.feat_dim) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) crossEntropyLoss = CrossEntropyLabelSmooth( num_classes=dataset.num_train_pids, use_gpu=use_gpu) tripletLoss = TripletLoss(margin=args.margin) regularLoss = RegularLoss(use_gpu=use_gpu) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = WarmupMultiStepLR(optimizer, args.stepsize, args.gamma, args.warmup_factor, args.warmup_items, args.warmup_method) start_epoch = args.start_epoch if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") atest(model, queryloader, galleryloader, use_gpu) return start_time = time.time() best_rank1 = -np.inf for epoch in range(start_epoch, args.max_epoch): print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch)) train(model, crossEntropyLoss, tripletLoss, regularLoss, optimizer, trainloader, use_gpu) # if args.stepsize > 0: scheduler.step() if (epoch + 1) >= 200 and (epoch + 1) % args.eval_step == 0: print("==> Test") rank1 = atest(model, queryloader, galleryloader, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, }, is_best, osp.join( args.save_dir, args.model_name + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def main(): # Load parameters. parser = argparse.ArgumentParser() parser.add_argument('--classifier', type=str, default='TextCNN', help='classifier to use "LSTM/TextCNN"') parser.add_argument('--pretrained', type=bool, default=False, help='finetune pre-trained classifier') parser.add_argument('--mode', type=str, default='train', help='train or eval') parser.add_argument('--epochs', type=int, default=50, help='number of training epochs') parser.add_argument('--batch_size', type=int, default=64, help='number of examples to process in a batch') parser.add_argument('--max_norm', type=float, default=5.0, help='max norm of gradient') parser.add_argument('--embed_trainable', type=bool, default=True, help='finetune pre-trained embeddings') parser.add_argument( '--device', type=str, default=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')) # rationale specific parameters. parser.add_argument('--lr_enc', type=float, default=1e-3, help='learning rate for the encoder') parser.add_argument('--lr_gen', type=float, default=1e-3, help='learning rate for the generator') parser.add_argument( '--num_hidden_rationale', type=int, default=64, help='number of hidden units for the PreGenerator LSTM for rationale') parser.add_argument( '--lstm_layer_rationale', type=int, default=2, help='number of layers for the PreGenerator LSTM for rationale') parser.add_argument( '--lstm_bidirectional_rationale', type=bool, default=True, help='bi-direction for the PreGenerator LSTM for rationale') parser.add_argument('--lambda_1', type=float, default=1e-2, help='regularizer of the length of selected words') parser.add_argument('--lambda_2', type=float, default=1e-3, help='regularizer of the local coherency of words') parser.add_argument( '--agg_mode', type=str, default='fc', help='aggregation mode chosen after the pregenerator LSTM layer') # LSTM specific parameters. parser.add_argument('--num_hidden', type=int, default=256, help='number of hidden units in the LSTM classifier') parser.add_argument('--lstm_layer', type=int, default=2, help='number of layers of lstm') parser.add_argument('--lstm_bidirectional', type=bool, default=True, help='bi-direction of lstm') # TextCNN specific parameters. parser.add_argument('--num_classes', type=int, default=6, help='number of target classes') parser.add_argument('--kernel_sizes', nargs='+', type=int, default=[2, 3, 4], help='kernel sizes for the convolution layer') parser.add_argument('--p', type=float, default=0.5, help='dropout rate') parser.add_argument('--c_out', type=int, default=32, help='output channel size of the convolution layer') args = parser.parse_args() # Create log object. if args.mode == 'train': sys.stdout = Logger(TRAIN_LOG_LOC) print_statement('HYPERPARAMETER SETTING') print_flags(args) train(args, GEN_MODEL_LOC, LSTM_MODEL_LOC, TCN_MODEL_LOC, LABEL_JSON_LOC) else: sys.stdout = Logger(TEST_LOG_LOC) print_statement('HYPERPARAMETER SETTING') print_flags(args) test(args, GEN_MODEL_LOC, LSTM_MODEL_LOC, TCN_MODEL_LOC, LABEL_JSON_LOC)
def main(env, args): # Initiate args useful for training start_episode = 0 args.current_frame = 0 args.eval_start = 0 args.test_num = 0 args.test_time = False args.best_avg_return = -1 # Make checkpoint path if there is none if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Instantiate metric tracking for progress bar # TODO: Add any other metrics we need to track # TODO: Still need to keep track of time for progress bar args.rewards = AverageMeter() args.returns = AverageMeter() args.episode_lengths = AverageMeter() args.losses = AverageMeter() # Model & experiences print("==> creating model '{}' with '{}' noise".format( args.alg, args.noise)) if args.alg == 'dqn': args.epsilon_greed_init = args.epsilon_greed args.initial_threshold = -math.log( 1 - args.epsilon_greed + args.epsilon_greed / args.action_dim) model = DQN(action_space=env.action_space, noise=args.noise, initial_threshold=args.initial_threshold) target_model = DQN(action_space=env.action_space, noise=args.noise, initial_threshold=args.initial_threshold) target_model.load_state_dict(model.state_dict()) args.memory = ReplayBuffer(args.replay_memory, args.use_cuda) else: model = PPO(action_space=env.action_space, noise=args.noise, clip_epsilon=args.clip_epsilon) # TODO: Instantiate RolloutStorage # rollouts = RolloutStorage(args.horizon, arg.processes?,...) # House models on GPU if needed if args.use_cuda: model.cuda() if args.alg == 'dqn': target_model.cuda() # Criterions and optimizers value_criterion = nn.functional.mse_loss if args.alg == 'dqn': if args.noise == 'adaptive': optimizer = optim.Adam(model.parameters(), lr=1e-4) else: optimizer = optim.RMSprop(model.parameters(), lr=2.5e-4, momentum=0.95, alpha=0.95, eps=1e-2) else: policy_criterion = model.surrogate_loss # TODO: revisit the choices here. Might be best to just go with defaults from PPO paper if args.noise == 'learned': optimizer = optim.RMSprop(model.parameters(), lr=2.5e-4, momentum=0.95, alpha=0.95, eps=1e-2) else: optimizer = optim.Adam(model.parameters(), lr=3e-4) # Resume # Unload status, meters, and previous state_dicts from checkpoint print("==> resuming from '{}' at frame {}". format(args.resume, args.start_frame) if args.resume else "==> starting from scratch at frame {}".format(args.start_frame)) title = '{}-{}'.format(args.noise, args.env_id) if args.resume: # Load checkpoint. assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) start_episode = checkpoint['episode'] + 1 args.current_frame = checkpoint['frame'] + 1 model.load_state_dict(checkpoint['state_dict']) if args.alg == 'dqn': target_model.load_state_dict(checkpoint['target_state_dict']) args.returns = checkpoint['returns'] args.best_avg_return = checkpoint['best_avg_return'] args.episode_lengths = checkpoint['episode_lengths'] optimizer.load_state_dict(checkpoint['optimizer']) args.logger = Logger(os.path.join(args.checkpoint, '{}-log.txt'.format(title)), title=title, resume=True) args.test_logger = Logger(os.path.join( args.checkpoint, 'eval-{}-log.txt'.format(title)), title=title, resume=True) else: args.logger = Logger(os.path.join(args.checkpoint, '{}-log.txt'.format(title)), title=title) args.logger.set_names( ['Episode', 'Frame', 'EpLen', 'AvgLoss', 'Return']) args.test_logger = Logger(os.path.join( args.checkpoint, 'eval-{}-log.txt'.format(title)), title=title) args.test_logger.set_names(['Frame', 'EpLen', 'Return']) # We need at least one experience in the replay buffer for DQN if args.alg == 'dqn': true_warmup = min(args.memory_warmup, args.replay_memory) print("==> filling replay buffer with {} transition(s)".format( true_warmup)) state = env.reset() for i in range(true_warmup): action = random.randrange(args.action_dim) successor, reward, done, _ = env.step(action) args.memory.add(state, action, reward, successor, done) state = successor if not done else env.reset() # Need next reset to be a true reset (due to EpisodicLifeEnv) env.was_real_done = True # Initialize bars args.bar = Bar('Training', max=args.n_frames) print("==> beginning training for {} frames".format(args.n_frames)) for episode in itertools.count(start_episode): # Train model if args.alg == 'dqn': env, model, target_model, optimizer, args = trainDQN( env, model, target_model, optimizer, value_criterion, args) else: env, model, optimizer, args = trainPPO(env, model, optimizer, value_criterion, policy_criterion, args) # Checkpoint model to disk is_best = args.returns.avg > args.best_avg_return if is_best: args.best_avg_return = args.returns.avg save_checkpoint( { 'episode': episode, 'frame': args.current_frame, 'state_dict': model.state_dict(), 'target_state_dict': target_model.state_dict() if args.alg == 'dqn' else None, 'rewards': args.rewards, 'returns': args.returns, 'best_avg_return': args.best_avg_return, 'episode_lengths': args.episode_lengths, 'losses': args.losses, 'optimizer': optimizer.state_dict() }, is_best, title) # Log metrics (episode, frame, episode length, average loss, return) args.logger.append([ episode, args.current_frame, args.episode_lengths.val, args.losses.avg, args.returns.val ]) # Reset frame-level meters args.losses.reset() args.rewards.reset() # Handle testing if args.test_time: # For testing only print("==> evaluating agent for {} frames at frame {}".format( args.eval_period, args.current_frame)) args.eval_start = args.current_frame args.testing_frame = args.current_frame args.test_bar = Bar('Testing', max=args.eval_period) args.test_rewards = AverageMeter() args.test_returns = AverageMeter() args.test_episode_lengths = AverageMeter() # Main testing loop while args.testing_frame - args.eval_start < args.eval_period: if args.alg == 'dqn': env, args = testDQN(env, model, args) else: env, args = testPPO(env, model, args) args.test_logger.append([ args.testing_frame - args.eval_start, args.test_episode_lengths.val, args.test_returns.val ]) args.test_episode_lengths.reset() args.test_rewards.reset() # For testing only: #break # Need next reset to be a true reset env.was_real_done = True # Need to turn off testing for next episode args.test_time = False args.test_num += 1 args.test_bar.finish() if args.current_frame > args.n_frames: break # For testing only: # if episode >= 100: # break #print('episode: {}'.format(episode)) # TODO: Handle cleanup args.bar.finish() args.logger.close() args.test_logger.close() args.logger.plot() env.close()