def main(extra_flags): # Check no unknown flags was passed. assert len(extra_flags) >= 1 if len(extra_flags) > 1: raise ValueError('Received unknown flags: %s' % extra_flags[1:]) # Get parameters from FLAGS passed. params = parameters.make_params_from_flags() deploy.setup_env(params) parameters.save_params(params, params.train_dir) # TF log... tfversion = deploy.tensorflow_version_tuple() deploy.log_fn('TensorFlow: %i.%i' % (tfversion[0], tfversion[1])) # Create model and dataset. dataset = datasets.create_dataset( params.data_dir, params.data_name, params.data_subset) model = models.create_model(params.model, dataset) set_model_params(model, params) # Run CNN trainer. trainer = deploy.TrainerCNN(dataset, model, params) trainer.print_info() trainer.run()
def play(args, game_config): state_shape = game_config['state_shape'] env = game_config['enviroment'] preprocessing = game_config['preprocessing'] actions = game_config['actions'] # Initialize action value function with random with random weights model = create_model(args, game_config) # keep track variables t = 0 epsilon = 0.05 done = False obs = np.zeros(state_shape, dtype=np.int8) while not done: if (t % args.frame_skip) == 0: if np.random.rand() < epsilon: action_idx = np.random.randint(low=0, high=len(actions)) else: qval = model.predict(np.array([obs]), verbose=0) action_idx = qval.argmax() ob, reward, done, info = env.step(actions[action_idx]) if (t % args.frame_skip) == 0: # update state obs[1:] = obs[:-1] obs[0] = preprocessing(ob) t += 1 if args.render: env.render()
def main(): X = joblib.load('./X_words.jbl') y = joblib.load('./y_words.jbl') print('loaded data') model = models.create_model(X.shape[1], X.shape[2]) print('model compiled') print(model.summary()) model.fit(X, y, batch_size=128, nb_epoch=1) model.save_weights('word_model.h5', overwrite=True)
def test_create_dynamic_models(self): name = "users_again" title = "Users" fields = [ {"id": "name", "title": "Name", "type": "char"}, {"id": "paycheck", "title": "Payment", "type": "int"}, {"id": "date_joined", "title": "Job start date", "type": "date"} ] model = create_model(name, title, create_fields(fields)) self.assertEqual(model.__name__, name) self.assertEqual(model._meta.verbose_name, title) self.assertEqual(len(model._meta.fields), 4)
def create_inference_graph(wanted_words, sample_rate, clip_duration_ms, clip_stride_ms, window_size_ms, window_stride_ms, dct_coefficient_count, model_architecture): """Creates an audio model with the nodes needed for inference. Uses the supplied arguments to create a model, and inserts the input and output nodes that are needed to use the graph for inference. Args: wanted_words: Comma-separated list of the words we're trying to recognize. sample_rate: How many samples per second are in the input audio files. clip_duration_ms: How many samples to analyze for the audio pattern. clip_stride_ms: How often to run recognition. Useful for models with cache. window_size_ms: Time slice duration to estimate frequencies from. window_stride_ms: How far apart time slices should be. dct_coefficient_count: Number of frequency bands to analyze. model_architecture: Name of the kind of model to generate. """ words_list = input_data.prepare_words_list(wanted_words.split(',')) model_settings = models.prepare_model_settings( len(words_list), sample_rate, clip_duration_ms, window_size_ms, window_stride_ms, dct_coefficient_count) runtime_settings = {'clip_stride_ms': clip_stride_ms} wav_data_placeholder = tf.placeholder(tf.string, [], name='wav_data') decoded_sample_data = contrib_audio.decode_wav( wav_data_placeholder, desired_channels=1, desired_samples=model_settings['desired_samples'], name='decoded_sample_data') spectrogram = contrib_audio.audio_spectrogram( decoded_sample_data.audio, window_size=model_settings['window_size_samples'], stride=model_settings['window_stride_samples'], magnitude_squared=True) fingerprint_input = contrib_audio.mfcc( spectrogram, decoded_sample_data.sample_rate, dct_coefficient_count=dct_coefficient_count) fingerprint_frequency_size = model_settings['dct_coefficient_count'] fingerprint_time_size = model_settings['spectrogram_length'] reshaped_input = tf.reshape(fingerprint_input, [ -1, fingerprint_time_size * fingerprint_frequency_size ]) logits = models.create_model( reshaped_input, model_settings, model_architecture, is_training=False, runtime_settings=runtime_settings) # Create an output to use for inference. tf.nn.softmax(logits, name='labels_softmax')
def init(): learning_rate = 0.01 classes = 10 # digits # Fetch inputs X, Y,testX, testY = mnist.load_data(one_hot=True) testX = testX.reshape([-1, 28, 28, 1]) # Instantiante model for testing model = models.create_model(learning_rate, [None, 28, 28, 1], 10, dir) model.load(dir + "/checkpoints/step-17200") evaluate_model_accuracy(model, testX, testY)
def load(self, checkpoint_path, model_name='tacotron'): print('Constructing model: %s' % model_name) inputs = tf.placeholder(tf.int32, [1, None], 'inputs') input_lengths = tf.placeholder(tf.int32, [1], 'input_lengths') with tf.variable_scope('model') as scope: self.model = create_model(model_name, hparams) self.model.initialize(inputs, input_lengths) self.wav_output = audio.inv_spectrogram_tensorflow(self.model.linear_outputs[0]) print('Loading checkpoint: %s' % checkpoint_path) self.session = tf.Session() self.session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self.session, checkpoint_path)
def init(): dropout = 0.8 learning_rate = 0.01 run_id = 'mnist_cnn_' + str(int(time.time())) X, Y, testX, testY = mnist.load_data(one_hot=True) X = X.reshape([-1, 28, 28, 1]) testX = testX.reshape([-1, 28, 28, 1]) # Instantiante model for training model = models.create_model(learning_rate, [None, 28, 28, 1], 10, dir, drop=dropout) # evaluate_model_accuracy(model, testX, testY) model.fit({'input': X}, {'target': Y}, n_epoch=20, validation_set=({'input': testX}, {'target': testY}), show_metric=True, run_id=run_id) model.save(dir + "/saveMnist_trained")
def train(args=None): model = create_model() print ('Created model...') iterations = 0 total_iterations = 0 sum_iterations = 0 checkpoint_num = 0 for epoch in range(args.epochs): print ('Training epoch', epoch) print ('total iterations', total_iterations) for iteration, (train_x, train_y) in enumerate(generate_data(args.image_folder, max_patches=0.05)): val_x, val_y = next(generate_data(args.image_folder, max_patches = 0.001)) print ('epoch: {0}\titeration: {1}'.format(epoch, iterations)) model.fit(train_x, train_y, validation_data = (val_x, val_y), batch_size=args.batch_size, nb_epoch=1, show_accuracy=True) iterations += len(train_x) checkpoint_filepath = os.path.join(args.checkpoint_directory, '{}_model.h5py'.format(total_iterations)) if not os.path.exists(args.checkpoint_directory): os.makedirs(args.checkpoint_directory) # Saving model if epoch % 3 == 0: model.save_weights(checkpoint_filepath, overwrite=True) # Scoring model #val_x, val_y = next(generate_data(args.image_folder, max_patches = 0.0001)) #score = model.test_on_batch(val_x, val_y) # Save some images to see how well the model is training if epoch % 1 == 0: pred_y = model.predict(val_x) for i, (orig, real, pred) in enumerate(zip(val_x, val_y, pred_y)): in_patch = np.rollaxis(orig, axis=0, start=3) imsave('debug/{0}_{1}_real.jpg'.format(i,checkpoint_num), vec2img(real)) imsave('debug/{0}_{1}_pred.jpg'.format(i, checkpoint_num), vec2img(pred)) imsave('debug/{0}_{1}_input.jpg'.format(i, checkpoint_num), in_patch) print (i, 'images saved for debugging')
return new_mask if __name__ == '__main__': opt_val = TestOptions().parse() # 加载验证数据集 opt_val.dataset_mode = 'single' opt_val.batch_size = 1 dataset_val = create_dataset(opt_val) dataset_val_size = len(dataset_val) print('The number of valling images = %d' % dataset_val_size) # 创建验证模型 model_val = create_model(opt_val) model_val.eval() # 从这一轮保存的权重恢复 model_val.setup(opt_val) metrics = RunningScore(opt_val.num_classes) metrics.reset() for i, data in enumerate(dataset_val): model_val.set_input(data) model_val.forward() gt = data["B_label"].numpy().squeeze() # [H, W] output = model_val.pre # [N, C, H, W] output = nn.functional.softmax(output, dim=1) # [N, C, H, W] output = nn.functional.upsample( output, (1024, 2048), mode='bilinear', align_corners=True).cpu().data[0].numpy() # [C, H, W]
tofile=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt_P)) logger.info(option.dict2str(opt_C)) #### Create test dataset and dataloader test_loaders = [] for phase, dataset_opt in sorted(opt_P['datasets'].items()): test_set = create_dataset(dataset_opt) test_loader = create_dataloader(test_set, dataset_opt) logger.info('Number of test images in [{:s}]: {:d}'.format( dataset_opt['name'], len(test_set))) test_loaders.append(test_loader) # load pretrained model by default model_F = create_model(opt_F) model_P = create_model(opt_P) model_C = create_model(opt_C) for test_loader in test_loaders: test_set_name = test_loader.dataset.opt['name'] #path opt[''] logger.info('\nTesting [{:s}]...'.format(test_set_name)) test_start_time = time.time() dataset_dir = os.path.join(opt_P['path']['results_root'], test_set_name) util.mkdir(dataset_dir) test_results = OrderedDict() test_results['psnr'] = [] test_results['ssim'] = [] test_results['psnr_y'] = [] test_results['ssim_y'] = []
def main(): # parse command line arguments parser = argparse.ArgumentParser(description="PyTorch LapSRN") opt_p = 'experiments/001_Train_SR-RRDB-3d_SynomagD_scale4.json' parser.add_argument('-opt', default=opt_p, type=str, required=False, help='Path to option JSON file.') config = option.parse(parser.parse_args().opt, True, is_tensorboard_available) config = option.dict_to_nonedict(config) run_config = config['run_config'] optim_config = config['optim_config'] data_config = config['data_config'] # train from scratch OR resume training if run_config['path']['resume_state']: # resuming training resume_state = torch.load(run_config['path']['resume_state']) else: # training from scratch resume_state = None util.mkdir_and_rename( run_config['path'] ['experiments_root']) # rename old folder if exists util.mkdirs((path for key, path in run_config['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # config loggers. Before it, the log will not work util.setup_logger(None, run_config['path']['log'], 'train', level=logging.INFO, screen=True) util.setup_logger('val', run_config['path']['log'], 'val', level=logging.INFO) logger = logging.getLogger('base') logger.info(option.dict2str(config)) if resume_state: # TODO: not implemented just copied, update check_resume # logger.info('Resuming training from epoch: {}, iter: {}.'.format( # resume_state['epoch'], resume_state['iter'])) # option.check_resume(config) # check resume options raise NotImplementedError # tensorboard logger if run_config['use_tb_logger'] and 'debug' not in run_config['id']: util.mkdir_and_rename( os.path.join(run_config['path']['root'], 'tb_logger', run_config['id'])) # rename old folder if exists tb_logger = SummaryWriter(log_dir=os.path.join( run_config['path']['root'], 'tb_logger', run_config['id'])) # set random seed logger.info("===> Set seed") seed = run_config['manual_seed'] if seed is None: seed = random.randint(1, 10000) logger.info("=> Random seed: {}".format(seed)) else: seed = int(seed, 16) logger.info("=> Manual seed: {}".format(seed)) seed = int(run_config['manual_seed'], 16) util.set_random_seed(seed) torch.backends.cudnn.benckmark = True logger.info("===> Loading datasets") for phase, dataset_opt in data_config.items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) logger.info('Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) total_iters = int(optim_config['niter']) total_epochs = int(math.ceil(total_iters / train_size)) if 'debug' in run_config['id']: total_epochs = 10 logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) train_loader = create_dataloader(train_set, dataset_opt) elif phase == 'val': val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt) logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None logger.info("===> Building model") # create model model = create_model(config) if is_tensorboard_available and 'debug' not in run_config['id']: # TODO: fix problem # Save graph to tensorboard # dummy_input = Variable(torch.rand((10,) + config['model_config']['input_shape'])) # tb_logger.add_graph(model.netG, (dummy_input,)) pass # resume training if resume_state: start_epoch = resume_state['epoch'] current_step = resume_state['iter'] model.resume_training(resume_state) # handle optimizers and schedulers else: current_step = 0 start_epoch = 0 logger.info('Start training from epoch: {:d}, iter: {:d}'.format( start_epoch, current_step)) best_psnr = OrderedDict([]) is_newBest = True for epoch in range(start_epoch, total_epochs): for _, train_data in enumerate(train_loader): current_step += 1 if current_step > total_iters: break # update learning rate model.update_learning_rate() # training model.feed_data(train_data) model.optimize_parameters(current_step) # log if current_step % run_config['logger']['print_freq'] == 0: logs = model.get_current_log() message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> '.format( epoch, current_step, model.get_current_learning_rate()) for k, v in logs.items(): message += '{:s}: {:.4e} '.format( k, v.val) if v.val is not None else '' # tensorboard logger if run_config[ 'use_tb_logger'] and 'debug' not in run_config[ 'id']: tb_logger.add_scalar('Train/running/{}'.format(k), v.val, current_step) logger.info(message) # validation if current_step % optim_config['val_freq'] == 0: avg_metric = OrderedDict([]) idx = 0 total_images = 0 img_dir = os.path.join(run_config['path']['val_images']) util.mkdir(img_dir) for val_data in val_loader: idx += 1 model.feed_data(val_data) model.test() visuals = model.get_current_visuals() visuals['hz'] = visuals['hz'].numpy() sr_imgs = OrderedDict([]) lr_imgs = OrderedDict([]) for k in visuals.keys(): if 'SR' in k: sr_imgs[k] = (util.tensor2img( visuals[k], min_max=None, out_type=np.float32, as_grid=False, data_format=data_config['val']['data_format']) ) # float32 if sr_imgs[k].ndim == 4: sr_imgs[k] = sr_imgs[k][np.newaxis, :, :, :, :] if 'LR' in k: lr_imgs[k] = (util.tensor2img( visuals[k], min_max=None, out_type=np.float32, as_grid=False, data_format=data_config['val']['data_format']) ) # float32 if lr_imgs[k].ndim == 4: lr_imgs[k] = lr_imgs[k][np.newaxis, :, :, :, :] gt_img = util.tensor2img( visuals['HR'], min_max=None, out_type=np.float32, as_grid=False, data_format=data_config['val']['data_format']) if gt_img.ndim == 4: gt_img = gt_img[np.newaxis, :, :, :, :] # calculate PSNR for sr_k in sr_imgs.keys(): if 'x' in sr_k: # find correct key for lr_k in lr_imgs.keys(): if sr_k.replace('SR', '') in lr_k: tmp_hr = lr_imgs[lr_k] break else: tmp_hr = gt_img for sr_vol, lr_vol in zip(sr_imgs[sr_k], tmp_hr): mse, rmse, psnr = util.calculate_mse_rmse_psnr( sr_vol, lr_vol) if sr_k in avg_metric: avg_metric[sr_k]['mse'] += mse avg_metric[sr_k]['rmse'] += rmse avg_metric[sr_k]['psnr'] += psnr else: avg_metric[sr_k] = OrderedDict([]) avg_metric[sr_k]['mse'] = mse avg_metric[sr_k]['rmse'] = rmse avg_metric[sr_k]['psnr'] = psnr # Save SR images for reference for img_num in range(len(visuals['hz'])): if total_images % 40 == 0: img_name = "{0:d}_{1:s}_{2:d}.png".format( total_images, str(Quantity(visuals['hz'][img_num], 'hz')), current_step) save_img_path = os.path.join(img_dir, img_name) util.showAndSaveSlice( sr_imgs, lr_imgs, gt_img, save_img_path, scale=config['model_config']['scale'], index=img_num, data_format=data_config['val']['data_format'], data_mean=data_config['val']['data_mean'], data_std=data_config['val']['data_std']) total_images += 1 log_str = '# Validation #' log_str2 = '<epoch:{:3d}, iter:{:8,d}>'.format( epoch, current_step) for k in avg_metric.keys(): for metric_k in avg_metric[k]: avg_metric[k][metric_k] = avg_metric[k][metric_k] / idx if 'rmse' in metric_k: if k not in best_psnr: best_psnr[k] = 10e6 if avg_metric[k][metric_k] < best_psnr[k]: is_newBest = True best_psnr[k] = avg_metric[k][metric_k] log_str += '\tBEST' log_str += ' {}-{}: {:.4e} * {}'.format( k, metric_k, avg_metric[k][metric_k], idx) log_str2 += ' {}-{}: {:.4e} * {}'.format( k, metric_k, avg_metric[k][metric_k], idx) # tensorboard logger if run_config[ 'use_tb_logger'] and 'debug' not in run_config[ 'id']: tb_logger.add_scalar( 'val/{}_{}'.format(k, metric_k), avg_metric[k][metric_k], current_step) # log logger.info(log_str) logger_val = logging.getLogger('val') # validation logger logger_val.info(log_str2) # save models and training states if current_step % run_config['logger'][ 'save_checkpoint_freq'] == 0 or is_newBest: logger.info('Saving models and training states.') model.save(current_step) model.save_training_state(epoch, current_step) is_newBest = False # log logs = model.get_current_log() for k, v in logs.items(): # tensorboard logger if run_config['use_tb_logger'] and 'debug' not in run_config['id']: if v.avg is not None: tb_logger.add_scalar('Train/{}'.format(k), v.avg, current_step) model.reset_log() logger.info('Saving the final model.') model.save('latest') logger.info('End of training.')
name='fingerprint_input') # fingerprint_input_raw = tf.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input_raw') # ground_truth_input = tf.placeholder(tf.float32, [None, label_count], name='groundtruth_input') ########################### bnnmodel = rebuild_model_final.paras_bnn(start_checkpoint) logits,lconv1,lconv1bn,ldsc1,ldsc1bn,ldsc1pw,ldsc1pwbn,ldsc2,ldsc2bn,ldsc2pw,ldsc2pwbn,ldsc3,ldsc3bn,ldsc3pw,ldsc3pwbn,\ ldsc4,ldsc4bn,ldsc4pw,ldsc4pwbn,lfc= bnnmodel.build_model( fingerprint_input, model_settings) logits_raw,conv1,conv1bn,dsc1,dsc1bn,dsc1pw,dsc1pwbn,dsc2,dsc2bn,dsc2pw,dsc2pwbn,dsc3,dsc3bn,dsc3pw,dsc3pwbn,\ dsc4,dsc4bn,dsc4pw,dsc4pwbn,fc= models.create_model( fingerprint_input_raw, model_settings, model_architecture, is_training=False) ########################### conv1_err = tf.reduce_sum(lconv1 - conv1) conv1bn_err = tf.reduce_sum(lconv1bn - conv1bn) dsc1_err = tf.reduce_sum(ldsc1 - dsc1) dsc1bn_err = tf.reduce_sum(ldsc1bn - dsc1bn) dsc1pw_err = tf.reduce_sum(ldsc1pw - dsc1pw) dsc1pwbn_err = tf.reduce_sum(ldsc1pwbn - dsc1pwbn) dsc2_err = tf.reduce_sum(ldsc2 - dsc2) dsc2bn_err = tf.reduce_sum(ldsc2bn - dsc2bn) dsc2pw_err = tf.reduce_sum(ldsc2pw - dsc2pw) dsc2pwbn_err = tf.reduce_sum(ldsc2pwbn - dsc2pwbn)
def main(): #### setup options of three networks parser = argparse.ArgumentParser() parser.add_argument('-opt_P', type=str, help='Path to option YMAL file of Predictor.') parser.add_argument('-opt_C', type=str, help='Path to option YMAL file of Corrector.') parser.add_argument('-opt_F', type=str, help='Path to option YMAL file of SFTMD_Net.') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() opt_P = option.parse(args.opt_P, is_train=True) opt_C = option.parse(args.opt_C, is_train=True) opt_F = option.parse(args.opt_F, is_train=True) # convert to NoneDict, which returns None for missing keys opt_P = option.dict_to_nonedict(opt_P) opt_C = option.dict_to_nonedict(opt_C) opt_F = option.dict_to_nonedict(opt_F) # choose small opt for SFTMD test, fill path of pre-trained model_F opt_F = opt_F['sftmd'] #### set random seed seed = opt_P['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) util.set_random_seed(seed) # load PCA matrix of enough kernel print('load PCA matrix') pca_matrix = torch.load('./pca_matrix.pth',map_location=lambda storage, loc: storage) print('PCA matrix shape: {}'.format(pca_matrix.shape)) #### distributed training settings if args.launcher == 'none': # disabled distributed training opt_P['dist'] = False opt_F['dist'] = False opt_C['dist'] = False rank = -1 print('Disabled distributed training.') else: opt_P['dist'] = True opt_F['dist'] = True opt_C['dist'] = True init_dist() world_size = torch.distributed.get_world_size() #Returns the number of processes in the current process group rank = torch.distributed.get_rank() #Returns the rank of current process group torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True ###### Predictor&Corrector train ###### #### loading resume state if exists if opt_P['path'].get('resume_state', None): # distributed resuming: all load into default GPU device_id = torch.cuda.current_device() resume_state = torch.load(opt_P['path']['resume_state'], map_location=lambda storage, loc: storage.cuda(device_id)) option.check_resume(opt_P, resume_state['iter']) # check resume options else: resume_state = None #### mkdir and loggers if rank <= 0: # normal training (rank -1) OR distributed training (rank 0-7) if resume_state is None: # Predictor path util.mkdir_and_rename( opt_P['path']['experiments_root']) # rename experiment folder if exists util.mkdirs((path for key, path in opt_P['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # Corrector path util.mkdir_and_rename( opt_C['path']['experiments_root']) # rename experiment folder if exists util.mkdirs((path for key, path in opt_C['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # config loggers. Before it, the log will not work util.setup_logger('base', opt_P['path']['log'], 'train_' + opt_P['name'], level=logging.INFO, screen=True, tofile=True) util.setup_logger('val', opt_P['path']['log'], 'val_' + opt_P['name'], level=logging.INFO, screen=True, tofile=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt_P)) logger.info(option.dict2str(opt_C)) # tensorboard logger if opt_P['use_tb_logger'] and 'debug' not in opt_P['name']: version = float(torch.__version__[0:3]) if version >= 1.1: # PyTorch 1.1 from torch.utils.tensorboard import SummaryWriter else: logger.info( 'You are using PyTorch {}. Tensorboard will use [tensorboardX]'.format(version)) from tensorboardX import SummaryWriter tb_logger = SummaryWriter(log_dir='../tb_logger/' + opt_P['name']) else: util.setup_logger('base', opt_P['path']['log'], 'train', level=logging.INFO, screen=True) logger = logging.getLogger('base') torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True #### create train and val dataloader dataset_ratio = 200 # enlarge the size of each epoch for phase, dataset_opt in opt_P['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int(math.ceil(len(train_set) / dataset_opt['batch_size'])) total_iters = int(opt_P['train']['niter']) total_epochs = int(math.ceil(total_iters / train_size)) if opt_P['dist']: train_sampler = DistIterSampler(train_set, world_size, rank, dataset_ratio) total_epochs = int(math.ceil(total_iters / (train_size * dataset_ratio))) else: train_sampler = None train_loader = create_dataloader(train_set, dataset_opt, opt_P, train_sampler) if rank <= 0: logger.info('Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) elif phase == 'val': val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt, opt_P, None) if rank <= 0: logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError('Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None assert val_loader is not None #### create model model_F = create_model(opt_F) #load pretrained model of SFTMD model_P = create_model(opt_P) model_C = create_model(opt_C) #### resume training if resume_state: logger.info('Resuming training from epoch: {}, iter: {}.'.format( resume_state['epoch'], resume_state['iter'])) start_epoch = resume_state['epoch'] current_step = resume_state['iter'] model_P.resume_training(resume_state) # handle optimizers and schedulers else: current_step = 0 start_epoch = 0 #### training logger.info('Start training from epoch: {:d}, iter: {:d}'.format(start_epoch, current_step)) for epoch in range(start_epoch, total_epochs + 1): if opt_P['dist']: train_sampler.set_epoch(epoch) for _, train_data in enumerate(train_loader): current_step += 1 if current_step > total_iters: break #### update learning rate, schedulers # model.update_learning_rate(current_step, warmup_iter=opt_P['train']['warmup_iter']) #### preprocessing for LR_img and kernel map prepro = util.SRMDPreprocessing(opt_P['scale'], pca_matrix, para_input=opt_P['code_length'], kernel=opt_P['kernel_size'], noise=False, cuda=True, sig_min=0.2, sig_max=4.0, rate_iso=1.0, scaling=3, rate_cln=0.2, noise_high=0.0) LR_img, ker_map = prepro(train_data['GT']) #### training Predictor model_P.feed_data(LR_img, ker_map) model_P.optimize_parameters(current_step) P_visuals = model_P.get_current_visuals() est_ker_map = P_visuals['Batch_est_ker_map'] #### log of model_P if current_step % opt_P['logger']['print_freq'] == 0: logs = model_P.get_current_log() message = 'Predictor <epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> '.format( epoch, current_step, model_P.get_current_learning_rate()) for k, v in logs.items(): message += '{:s}: {:.4e} '.format(k, v) # tensorboard logger if opt_P['use_tb_logger'] and 'debug' not in opt_P['name']: if rank <= 0: tb_logger.add_scalar(k, v, current_step) if rank <= 0: logger.info(message) #### training Corrector for step in range(opt_C['step']): # test SFTMD for corresponding SR image model_F.feed_data(train_data, LR_img, est_ker_map) model_F.test() F_visuals = model_F.get_current_visuals() SR_img = F_visuals['Batch_SR'] # Test SFTMD to produce SR images # train corrector given SR image and estimated kernel map model_C.feed_data(SR_img, est_ker_map, ker_map) model_C.optimize_parameters(current_step) C_visuals = model_C.get_current_visuals() est_ker_map = C_visuals['Batch_est_ker_map'] #### log of model_C if current_step % opt_C['logger']['print_freq'] == 0: logs = model_C.get_current_log() message = 'Corrector <epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> '.format( epoch, current_step, model_C.get_current_learning_rate()) for k, v in logs.items(): message += '{:s}: {:.4e} '.format(k, v) # tensorboard logger if opt_C['use_tb_logger'] and 'debug' not in opt_C['name']: if rank <= 0: tb_logger.add_scalar(k, v, current_step) if rank <= 0: logger.info(message) # validation, to produce ker_map_list(fake) if current_step % opt_P['train']['val_freq'] == 0 and rank <= 0: avg_psnr = 0.0 idx = 0 for _, val_data in enumerate(val_loader): prepro = util.SRMDPreprocessing(opt_P['scale'], pca_matrix, para_input=opt_P['code_length'], kernel=opt_P['kernel_size'], noise=False, cuda=True, sig_min=0.2, sig_max=4.0, rate_iso=1.0, scaling=3, rate_cln=0.2, noise_high=0.0) LR_img, ker_map = prepro(val_data['GT']) single_img_psnr = 0.0 lr_img = util.tensor2img(LR_img) #save LR image for reference # valid Predictor model_P.feed_data(LR_img, ker_map) model_P.test() P_visuals = model_P.get_current_visuals() est_ker_map = P_visuals['Batch_est_ker_map'] # Save images for reference img_name = os.path.splitext(os.path.basename(val_data['LQ_path'][0]))[0] img_dir = os.path.join(opt_P['path']['val_images'], img_name) # img_dir = os.path.join(opt_F['path']['val_images'], str(current_step), '_', str(step)) util.mkdir(img_dir) save_lr_path = os.path.join(img_dir, '{:s}_LR.png'.format(img_name)) util.save_img(lr_img, save_lr_path) for step in range(opt_C['step']): step += 1 idx += 1 model_F.feed_data(val_data, LR_img, est_ker_map) model_F.test() F_visuals = model_F.get_current_visuals() SR_img = F_visuals['Batch_SR'] # Test SFTMD to produce SR images model_C.feed_data(SR_img, est_ker_map, ker_map) model_C.test() C_visuals = model_C.get_current_visuals() est_ker_map = C_visuals['Batch_est_ker_map'] sr_img = util.tensor2img(F_visuals['SR']) # uint8 gt_img = util.tensor2img(F_visuals['GT']) # uint8 save_img_path = os.path.join(img_dir, '{:s}_{:d}_{:d}.png'.format(img_name, current_step, step)) util.save_img(sr_img, save_img_path) # calculate PSNR crop_size = opt_P['scale'] gt_img = gt_img / 255. sr_img = sr_img / 255. cropped_sr_img = sr_img[crop_size:-crop_size, crop_size:-crop_size, :] cropped_gt_img = gt_img[crop_size:-crop_size, crop_size:-crop_size, :] step_psnr = util.calculate_psnr(cropped_sr_img * 255, cropped_gt_img * 255) logger.info( '<epoch:{:3d}, iter:{:8,d}, step:{:3d}> img:{:s}, psnr: {:.6f}'.format(epoch, current_step, step, img_name, step_psnr)) single_img_psnr += step_psnr avg_psnr += util.calculate_psnr(cropped_sr_img * 255, cropped_gt_img * 255) avg_signle_img_psnr = single_img_psnr / step logger.info( '<epoch:{:3d}, iter:{:8,d}, step:{:3d}> img:{:s}, average psnr: {:.6f}'.format(epoch, current_step, step, img_name, avg_signle_img_psnr)) avg_psnr = avg_psnr / idx # log logger.info('# Validation # PSNR: {:.6f}'.format(avg_psnr)) logger_val = logging.getLogger('val') # validation logger logger_val.info('<epoch:{:3d}, iter:{:8,d}, step:{:3d}> psnr: {:.6f}'.format(epoch, current_step, step, avg_psnr)) # tensorboard logger if opt_P['use_tb_logger'] and 'debug' not in opt_P['name']: tb_logger.add_scalar('psnr', avg_psnr, current_step) #### save models and training states if current_step % opt_P['logger']['save_checkpoint_freq'] == 0: if rank <= 0: logger.info('Saving models and training states.') model_P.save(current_step) model_P.save_training_state(epoch, current_step) model_C.save(current_step) model_C.save_training_state(epoch, current_step) if rank <= 0: logger.info('Saving the final model.') model_P.save('latest') model_C.save('latest') logger.info('End of Predictor and Corrector training.') tb_logger.close()
def main(_): best_acc = 0 best_step = 0 best_acc_istrain = 0 best_step_istrain = 0 # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data_filler.prepare_words_list_my(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data_filler.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) ############################################## ############tensorflow modules########## fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') # ############ 模型创建 ########## istrain = tf.placeholder(tf.bool, name='istrain') logits= models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=istrain) ############ 模型创建 ########## # logits, dropout_prob= models.create_model( # fingerprint_input, # model_settings, # FLAGS.model_architecture, # is_training=True) # Define loss and optimizer ############ 真实值 ########## ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. ############ 交叉熵计算 ########## # with tf.name_scope('cross_entropy'): # cross_entropy_mean = tf.reduce_mean( # tf.nn.softmax_cross_entropy_with_logits( # labels=ground_truth_input, logits=logits)) + beta*loss_norm with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits)) tf.summary.scalar('cross_entropy', cross_entropy_mean) ############ 学习率、准确率、混淆矩阵 ########## # learning_rate_input 学习率输入(tf.placeholder) # train_step 训练过程 (优化器) # predicted_indices 预测输出索引 # expected_indices 实际希望输出索引 # correct_prediction 正确预测矩阵 # confusion_matrix 混淆矩阵 # evaluation_step 正确分类概率(每个阶段) # global_step 全局训练阶段 # increment_global_step 全局训练阶段递增 learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer( learning_rate_input).minimize(cross_entropy_mean) # with tf.name_scope('train'), tf.control_dependencies(control_dependencies): # learning_rate_input = tf.placeholder( # tf.float32, [], name='learning_rate_input') # # train_step = tf.train.GradientDescentOptimizer( # # learning_rate_input).minimize(cross_entropy_mean) # with tf.control_dependencies(update_ops): # train_step = tf.train.AdamOptimizer( # learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix( expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) acc = tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables(),max_to_keep=None)# max keep file // moren 5 # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() validation_merged_summaries = tf.summary.merge([tf.get_collection(tf.GraphKeys.SUMMARIES,'accuracy'),tf.get_collection(tf.GraphKeys.SUMMARIES,'cross_entropy')]) test_summaries = tf.summary.merge([acc]) test_summaries_istrain = tf.summary.merge([acc]) train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') test_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test') test_istrain_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/test_istrain') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) ### # model1: fc # model2: conv :940k个parameter # model3:low_latancy_conv:~~model1 # model4: 750k # Training loop. ############################################# ######## 主循环 ###### ############################################# training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. ####### 自动切换学习率 ####### if training_step <12000+1: learning_rate_value = learning_rates_list[0]*0.02**(training_step/12000) else: learning_rate_value = learning_rates_list[0]*0.02 #0.015 12000 training_steps_sum = 0 # for i in range(len(training_steps_list)): # training_steps_sum += training_steps_list[i] # if training_step <= training_steps_sum: # learning_rate_value = learning_rates_list[i] # break # Pull the audio samples we'll use for training. ####### audio处理器导入数据 ################################## ##get_data(self, how_many, offset, model_settings, background_frequency, ## background_volume_range, time_shift, mode, sess) ######################################################################## train_fingerprints, train_ground_truth = audio_processor.get_data_my( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) #mid = np.abs(np.max(train_fingerprints) + np.min(train_fingerprints)) / 2 #half = np.max(train_fingerprints) - np.min(train_fingerprints) #train_fingerprints = ((train_fingerprints + mid) / half * 255).astype(int) #### 输入归一化 #### # train_fingerprints=input_normalization(train_fingerprints) # Run the graph with this batch of training data. train_fingerprints = np_round_and_clip(train_fingerprints) train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, istrain:True }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None ############################################# ########交叉验证集重复计算正确率和混淆矩阵###### for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data_my(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) #mid = np.abs(np.max(validation_fingerprints) + np.min(validation_fingerprints)) / 2 # half = np.max(validation_fingerprints) - np.min(validation_fingerprints) #validation_fingerprints = ((validation_fingerprints + mid) / half * 255).astype(int) # #### 输入归一化 #### # validation_fingerprints = input_normalization(validation_fingerprints) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_fingerprints = np_round_and_clip(validation_fingerprints) validation_summaries, validation_accuracy, conf_matrix = sess.run( [validation_merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, istrain: True }) validation_writer.add_summary(validation_summaries, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) ############################################# ######## 测试集重复计算正确率和混淆矩阵 ###### set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) test_fingerprints, test_ground_truth = audio_processor.get_data_my( -1, 0, model_settings, 0.0, 0.0, 0, 'testing', sess) #mid = np.abs(np.max(test_fingerprints) + np.min(test_fingerprints)) / 2 #half = np.max(test_fingerprints) - np.min(test_fingerprints) #test_fingerprints = ((test_fingerprints + mid) / half * 255).astype(int) test_fingerprints = np_round_and_clip(test_fingerprints) final_summary,test_accuracy, conf_matrix = sess.run( [test_summaries,evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, istrain : False }) final_summary_istrain,test_accuracy_istrain= sess.run( [test_summaries_istrain,evaluation_step], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, istrain : True }) if test_accuracy > best_acc: best_acc = test_accuracy best_step = training_step if test_accuracy_istrain > best_acc_istrain: best_acc_istrain = test_accuracy_istrain best_step_istrain = training_step test_writer.add_summary(final_summary, training_step) test_istrain_writer.add_summary(final_summary_istrain, training_step) tf.logging.info('Confusion Matrix:\n %s' % (conf_matrix)) tf.logging.info('test accuracy = %.1f%% (N=%d)' % (test_accuracy * 100,6882)) tf.logging.info('test_istrain accuracy = %.1f%% (N=%d)' % (test_accuracy_istrain * 100,6882)) tf.logging.info('Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + ' at step of ' + str(best_step)) tf.logging.info('Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + ' at step of ' + str(best_step_istrain)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir + '/'+FLAGS.model_architecture, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) print_line = 'Best test accuracy before now = %.1f%% (N=%d)' % (best_acc * 100,6882) + ' at step of ' + str(best_step) + '\n' + \ 'Best test_istrain accuracy before now = %.1f%% (N=%d)' % (best_acc_istrain * 100,6882) + ' at step of ' + str(best_step_istrain) if training_step == training_steps_max: with open(FLAGS.train_dir + '/' +FLAGS.model_architecture+ '/details.txt', 'w') as f: f.write(print_line)
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') logits, dropout_prob = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, FLAGS.model_size_info, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits)) tf.summary.scalar('cross_entropy', cross_entropy_mean) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.name_scope('train'), tf.control_dependencies(update_ops), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') train_op = tf.train.AdamOptimizer( learning_rate_input) train_step = slim.learning.create_train_op(cross_entropy_mean, train_op) # train_step = tf.train.GradientDescentOptimizer( # learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix( expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() # Parameter counts params = tf.trainable_variables() num_params = sum(map(lambda t: np.prod(tf.shape(t.value()).eval()), params)) print('Total number of Parameters: ', num_params) start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # Training loop. best_accuracy = 0 training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 1.0 }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.2f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. time1 = time.time() validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) time2 = time.time() print ("TIMMEEEEEE: ", (time2 - time1)) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.2f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint when validation accuracy improves if total_accuracy > best_accuracy: best_accuracy = total_accuracy checkpoint_path = os.path.join(FLAGS.train_dir, 'best', FLAGS.model_architecture + '_'+ str(int(best_accuracy*10000)) + '.ckpt') tf.logging.info('Saving best model to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) tf.logging.info('So far the best validation accuracy is %.2f%%' % (best_accuracy*100)) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.2f%% (N=%d)' % (total_accuracy * 100, set_size))
def main(): # options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, required=True, help='Path to option JSON file.') opt = option.parse(parser.parse_args().opt, is_train=True) opt = option.dict_to_nonedict( opt) # Convert to NoneDict, which return None for missing key. # train from scratch OR resume training if opt['path']['resume_state']: # resuming training resume_state = torch.load(opt['path']['resume_state']) else: # training from scratch resume_state = None util.mkdir_and_rename( opt['path']['experiments_root']) # rename old folder if exists util.mkdirs((path for key, path in opt['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # config loggers. Before it, the log will not work util.setup_logger(None, opt['path']['log'], 'train', level=logging.INFO, screen=True) util.setup_logger('val', opt['path']['log'], 'val', level=logging.INFO) logger = logging.getLogger('base') if resume_state: logger.info('Resuming training from epoch: {}, iter: {}.'.format( resume_state['epoch'], resume_state['iter'])) option.check_resume(opt) # check resume options logger.info(option.dict2str(opt)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: from tensorboardX import SummaryWriter tb_logger = SummaryWriter(log_dir='../tb_logger/' + opt['name']) # random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) logger.info('Random seed: {}'.format(seed)) util.set_random_seed(seed) torch.backends.cudnn.benckmark = True # torch.backends.cudnn.deterministic = True # create train and val dataloader for phase, dataset_opt in opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) logger.info('Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) total_iters = int(opt['train']['niter']) total_epochs = int(math.ceil(total_iters / train_size)) logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) train_loader = create_dataloader(train_set, dataset_opt) elif phase == 'val': val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt) logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None # create model model = create_model(opt) # resume training if resume_state: start_epoch = resume_state['epoch'] current_step = resume_state['iter'] model.resume_training(resume_state) # handle optimizers and schedulers else: current_step = 0 start_epoch = 0 # training logger.info('Start training from epoch: {:d}, iter: {:d}'.format( start_epoch, current_step)) for epoch in range(start_epoch, total_epochs): for _, train_data in enumerate(train_loader): current_step += 1 if current_step > total_iters: break # update learning rate model.update_learning_rate() # training model.feed_data(train_data) model.optimize_parameters(current_step) # log if current_step % opt['logger']['print_freq'] == 0: logs = model.get_current_log() message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> '.format( epoch, current_step, model.get_current_learning_rate()) for k, v in logs.items(): message += '{:s}: {:.4e} '.format(k, v) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: tb_logger.add_scalar(k, v, current_step) logger.info(message) # validation if current_step % opt['train']['val_freq'] == 0: avg_psnr = 0.0 idx = 0 for val_data in val_loader: idx += 1 img_name = os.path.splitext( os.path.basename(val_data['LR_path'][0]))[0] img_dir = os.path.join(opt['path']['val_images'], img_name) util.mkdir(img_dir) model.feed_data(val_data) model.test() visuals = model.get_current_visuals() sr_img = util.tensor2img(visuals['SR']) # uint8 gt_img = util.tensor2img(visuals['HR']) # uint8 # Save SR images for reference save_img_path = os.path.join(img_dir, '{:s}_{:d}.png'.format(\ img_name, current_step)) util.save_img(sr_img, save_img_path) # calculate PSNR crop_size = opt['scale'] gt_img = gt_img / 255. sr_img = sr_img / 255. cropped_sr_img = sr_img[crop_size:-crop_size, crop_size:-crop_size, :] cropped_gt_img = gt_img[crop_size:-crop_size, crop_size:-crop_size, :] cropped_sr_img_y = bgr2ycbcr(cropped_sr_img, only_y=True) cropped_gt_img_y = bgr2ycbcr(cropped_gt_img, only_y=True) avg_psnr += util.calculate_psnr(cropped_sr_img_y * 255, cropped_gt_img_y * 255) avg_psnr = avg_psnr / idx # log logger.info('# Validation # PSNR: {:.4e}'.format(avg_psnr)) logger_val = logging.getLogger('val') # validation logger logger_val.info( '<epoch:{:3d}, iter:{:8,d}> psnr: {:.4e}'.format( epoch, current_step, avg_psnr)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: tb_logger.add_scalar('psnr', avg_psnr, current_step) # save models and training states if current_step % opt['logger']['save_checkpoint_freq'] == 0: logger.info('Saving models and training states.') model.save(current_step) model.save_training_state(epoch, current_step) logger.info('Saving the final model.') model.save('latest') logger.info('End of training.')
dct_coefficient_count) audio_processor = input_data_filler.AudioProcessor( data_url, data_dir, silence_percentage, unknown_percentage, wanted_words.split(','), validation_percentage, testing_percentage, model_settings) time_shift_samples = int((time_shift_ms * sample_rate) / 1000) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input') ground_truth_input = tf.placeholder(tf.float32, [None, label_count], name='groundtruth_input') logits = models.create_model(fingerprint_input, model_settings, model_architecture, is_training=False) softmax = tf.nn.softmax(logits, name='labels_softmax') with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=ground_truth_input, logits=logits)) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
test_opt.batch_size = 1 test_opt.num_threads = 1 test_opt.serial_batches = True test_opt.no_flip = True test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=test_opt.batch_size, shuffle=False, num_workers=test_opt.num_threads, pin_memory=True) test_dataset_size = len(test_data_loader) print('#test images = %d' % test_dataset_size) model = create_model(test_opt, test_dataset) model.eval() model.setup(test_opt) visualizer = Visualizer(test_opt) test_loss_iter = [] gts = None preds = None epoch_iter = 0 model.init_test_eval() epoch = 0 num = 5 # How many images to save in an image if not os.path.exists('vis'): os.makedirs('vis') with torch.no_grad(): iterator = iter(test_data_loader) i = 0
from .datasets import create_dataset from .models import create_model from .util.visualizer import Visualizer except ImportError: from options import TrainOptions from datasets import create_dataset from models import create_model from util.visualizer import Visualizer if __name__ == '__main__': opt = TrainOptions().parse() dataset = create_dataset(opt) dataset_size = len(dataset) # get the number of images in the dataset print('the number of training images = %d' % dataset_size) model = create_model(opt) # create a model with model parameters in `opt` model.setup( opt) # regular setup: load and print networks; create schedulers visualizer = Visualizer(opt) total_iters = 0 for epoch in range(opt.start_epoch, opt.start_epoch + opt.n_epochs + 1): epoch_since = time.time() iter_data_since = time.time() epoch_iter = 0 visualizer.reset() model.update_learning_rate( ) # update learning rates in the beginning of every epoch for i, batch in enumerate(dataset): iter_since = time.time() if total_iters % opt.print_freq == 0:
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) parent_id = args.pid log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) if parent_id: log('Downloading model files from drive') download_checkpoints(parent_id) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = '%s |Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( time.asctime(), step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: list_files = [ os.path.join(log_dir, 'checkpoint'), os.path.join(log_dir, 'train.log') ] #files to be uploaded to drive log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) prefix = saver.save(sess, checkpoint_path, global_step=step) list_files.extend(glob.glob(prefix + '.*')) list_files.extend( glob.glob(os.path.join(log_dir, 'events.*'))) try: log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) info = '\n'.join( textwrap.wrap( '%s, %s, %s, %s, step=%d, loss=%.5f' % (sequence_to_text(input_seq), args.model, commit, time_string(), step, loss), 70, break_long_words=False)) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join( log_dir, 'step-%d-align.png' % step), info=info) log('Input: %s' % sequence_to_text(input_seq)) list_files.append( os.path.join(log_dir, 'step-%d-audio.wav' % step)) list_files.append( os.path.join(log_dir, 'step-%d-align.png' % step)) except Exception as e: log(str(e)) print(e) if parent_id: try: upload_to_drive(list_files, parent_id) except Exception as e: print(e) with open('drive_log.txt', 'a') as ferr: ferr.write('\n\n\n' + time.asctime()) ferr.write('\n' + ', '.join(list_files)) ferr.write('\n' + str(e)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def main(): global msglogger # Parse arguments prsr = parser.get_parser() args = prsr.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) msglogger = apputils.config_pylogger( os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state(sys.argv, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) start_epoch = 0 best_epochs = [ distiller.MutableNamedTuple({ 'epoch': 0, 'top1': 0, 'sparsity': 0 }) for i in range(args.num_best_scores) ] if args.deterministic: # Experiment reproducibility is sometimes important. Pete Warden expounded about this # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ # In Pytorch, support for deterministic execution is still a bit clunky. if args.workers > 1: msglogger.error( 'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1' ) exit(1) # Use a well-known seed, for repeatability of experiments distiller.set_deterministic() else: # This issue: https://github.com/pytorch/pytorch/issues/3659 # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled. cudnn.benchmark = True if args.cpu or not torch.cuda.is_available(): # Set GPU index to -1 if using CPU args.device = 'cpu' args.gpus = -1 else: args.device = 'cuda' if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: msglogger.error( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) exit(1) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: msglogger.error( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) exit(1) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Infer the dataset from the model name args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet' args.num_classes = 10 if args.dataset == 'cifar10' else 1000 if args.earlyexit_thresholds: args.num_exits = len(args.earlyexit_thresholds) + 1 args.loss_exits = [0] * args.num_exits args.losses_exits = [] args.exiterrors = [] # Create the model model = create_model(args.pretrained, args.dataset, args.arch, parallel=not args.load_serialized, device_ids=args.gpus) compression_scheduler = None # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(msglogger.logdir) pylogger = PythonLogger(msglogger) # capture thresholds for early-exit training if args.earlyexit_thresholds: msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds) # We can optionally resume from a checkpoint if args.resume: model, compression_scheduler, start_epoch = apputils.load_checkpoint( model, chkpt_file=args.resume) model.to(args.device) # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(args.device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) if args.ADC: return automated_deep_compression(model, criterion, optimizer, pylogger, args) # This sample application can be invoked to produce various summary reports. if args.summary: return summarize_model(model, args.dataset, which_summary=args.summary) activations_collectors = create_activation_stats_collectors( model, *args.activation_stats) if args.qe_calibration: msglogger.info('Quantization calibration stats collection enabled:') msglogger.info( '\tStats will be collected for {:.1%} of test dataset'.format( args.qe_calibration)) msglogger.info( '\tSetting constant seeds and converting model to serialized execution' ) distiller.set_deterministic() model = distiller.make_non_parallel_copy(model) activations_collectors.update( create_quantization_stats_collector(model)) args.evaluate = True args.effective_test_size = args.qe_calibration # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. train_loader, val_loader, test_loader, _ = apputils.load_data( args.dataset, os.path.expanduser(args.data), args.batch_size, args.workers, args.validation_split, args.deterministic, args.effective_train_size, args.effective_valid_size, args.effective_test_size) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) if args.sensitivity is not None: sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2]) return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities) if args.evaluate: return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args, compression_scheduler) if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler) # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer) model.to(args.device) elif compression_scheduler is None: compression_scheduler = distiller.CompressionScheduler(model) if args.thinnify: #zeros_mask_dict = distiller.create_model_masks_dict(model) assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify" distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None) apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler, name="{}_thinned".format( args.resume.replace(".pth.tar", "")), dir=msglogger.logdir) print( "Note: your model may have collapsed to random inference, so you may want to fine-tune" ) return args.kd_policy = None if args.kd_teacher: teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus) if args.kd_resume: teacher, _, _ = apputils.load_checkpoint(teacher, chkpt_file=args.kd_resume) dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt) args.kd_policy = distiller.KnowledgeDistillationPolicy( model, teacher, args.kd_temp, dlw) compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs, frequency=1) msglogger.info('\nStudent-Teacher knowledge distillation enabled:') msglogger.info('\tTeacher Model: %s', args.kd_teacher) msglogger.info('\tTemperature: %s', args.kd_temp) msglogger.info('\tLoss Weights (distillation | student | teacher): %s', ' | '.join(['{:.2f}'.format(val) for val in dlw])) msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch) for epoch in range(start_epoch, start_epoch + args.epochs): # This is the main training loop. msglogger.info('\n') if compression_scheduler: compression_scheduler.on_epoch_begin(epoch) # Train for one epoch with collectors_context(activations_collectors["train"]) as collectors: train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers=[tflogger, pylogger], args=args) distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger]) distiller.log_activation_statsitics( epoch, "train", loggers=[tflogger], collector=collectors["sparsity"]) if args.masks_sparsity: msglogger.info( distiller.masks_sparsity_tbl_summary( model, compression_scheduler)) # evaluate on validation set with collectors_context(activations_collectors["valid"]) as collectors: top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch) distiller.log_activation_statsitics( epoch, "valid", loggers=[tflogger], collector=collectors["sparsity"]) save_collectors_data(collectors, msglogger.logdir) stats = ('Peformance/Validation/', OrderedDict([('Loss', vloss), ('Top1', top1), ('Top5', top5)])) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) if compression_scheduler: compression_scheduler.on_epoch_end(epoch, optimizer) # Update the list of top scores achieved so far, and save the checkpoint is_best = top1 > best_epochs[-1].top1 if top1 > best_epochs[0].top1: best_epochs[0].epoch = epoch best_epochs[0].top1 = top1 # Keep best_epochs sorted such that best_epochs[0] is the lowest top1 in the best_epochs list best_epochs = sorted(best_epochs, key=lambda score: score.top1) for score in reversed(best_epochs): if score.top1 > 0: msglogger.info('==> Best Top1: %.3f on Epoch: %d', score.top1, score.epoch) apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler, best_epochs[-1].top1, is_best, args.name, msglogger.logdir) # Finally run results on the test set test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
def main(configs, opt, gpu_id, queue, verbose): opt.gpu_ids = [gpu_id] dataloader = create_dataloader(opt, verbose) model = create_model(opt, verbose) model.setup(opt, verbose) device = model.device if not opt.no_fid: block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048] inception_model = InceptionV3([block_idx]) inception_model.to(device) inception_model.eval() if 'cityscapes' in opt.dataroot and opt.direction == 'BtoA': drn_model = DRNSeg('drn_d_105', 19, pretrained=False) util.load_network(drn_model, opt.drn_path, verbose=False) if len(opt.gpu_ids) > 0: drn_model = nn.DataParallel(drn_model, opt.gpu_ids) drn_model.eval() npz = np.load(opt.real_stat_path) results = [] for data_i in dataloader: model.set_input(data_i) break for config in tqdm.tqdm(configs): qualified = True macs, _ = model.profile(config) if macs > opt.budget: qualified = False else: qualified = True fakes, names = [], [] if qualified: for i, data_i in enumerate(dataloader): model.set_input(data_i) model.test(config) fakes.append(model.fake_B.cpu()) for path in model.get_image_paths(): short_path = ntpath.basename(path) name = os.path.splitext(short_path)[0] names.append(name) result = {'config_str': encode_config(config), 'macs': macs} if not opt.no_fid: if qualified: fid = get_fid(fakes, inception_model, npz, device, opt.batch_size, use_tqdm=False) result['fid'] = fid else: result['fid'] = 1e9 if 'cityscapes' in opt.dataroot and opt.direction == 'BtoA': if qualified: mIoU = get_mIoU(fakes, names, drn_model, device, data_dir=opt.cityscapes_path, batch_size=opt.batch_size, num_workers=opt.num_threads, use_tqdm=False) result['mIoU'] = mIoU else: result['mIoU'] = mIoU print(result, flush=True) results.append(result) queue.put(results)
def evaluate_sintel(args, sintel_dir="G:/Datasets/MPI-Sintel-complete/"): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') out_path = "G:/Code/CycleGAN/eval/" raft_model = initRaftModel(args, device) #domains = os.listdir(args.style_dir) #domains.sort() num_domains = 4 #len(domains) transform = [] transform.append(transforms.ToTensor()) transform.append( transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))) transform = transforms.Compose(transform) train_dir = os.path.join(sintel_dir, "training", "final") train_list = os.listdir(train_dir) train_list.sort() test_dir = os.path.join(sintel_dir, "test", "final") test_list = os.listdir(test_dir) test_list.sort() video_list = [os.path.join(train_dir, vid) for vid in train_list] video_list += [os.path.join(test_dir, vid) for vid in test_list] vid_list = train_list + test_list tcl_st_dict = {} tcl_lt_dict = {} tcl_st_dict = OrderedDict() tcl_lt_dict = OrderedDict() dt_dict = OrderedDict() args.checkpoints_dir = os.getcwd() + "\\checkpoints\\" model_list = os.listdir(args.checkpoints_dir) model_list.sort() for j, vid_dir in enumerate(video_list): vid = vid_list[j] sintel_dset = SingleSintelVideo(vid_dir, transform) loader = data.DataLoader(dataset=sintel_dset, batch_size=1, shuffle=False, num_workers=0) for y in range(1, num_domains): #y_trg = torch.Tensor([y])[0].type(torch.LongTensor).to(device) key = vid + "_s" + str(y) vid_path = os.path.join(out_path, key) if not os.path.exists(vid_path): os.makedirs(vid_path) tcl_st_vals = [] tcl_lt_vals = [] dt_vals = [] args.name = model_list[y - 1] #args.model = "cycle_gan" model = create_model(args) model.setup(args) for i, imgs in enumerate(tqdm(loader, total=len(loader))): img, img_last, img_past = imgs img = img.to(device) img_last = img_last.to(device) img_past = img_past.to(device) t_start = time.time() x_fake = model.forward_eval(img) t_end = time.time() dt_vals.append((t_end - t_start) * 1000) if i > 0: tcl_st = computeTCL(model, raft_model, x_fake, img, img_last) tcl_st_vals.append(tcl_st.cpu().numpy()) if i >= 5: tcl_lt = computeTCL(model, raft_model, x_fake, img, img_past) tcl_lt_vals.append(tcl_lt.cpu().numpy()) filename = os.path.join(vid_path, "frame_%04d.png" % i) save_image(x_fake[0], ncol=1, filename=filename) tcl_st_dict["TCL-ST_" + key] = float(np.array(tcl_st_vals).mean()) tcl_lt_dict["TCL-LT_" + key] = float(np.array(tcl_lt_vals).mean()) dt_dict["DT_" + key] = float(np.array(dt_vals).mean()) save_dict_as_json("TCL-ST", tcl_st_dict, out_path, num_domains) save_dict_as_json("TCL-LT", tcl_lt_dict, out_path, num_domains) save_dict_as_json("DT", dt_dict, out_path, num_domains)
def main(): # 1. load dataset, train and valid train_dataset, valid_dataset = build_dataset(n_mels = n_mels, train_dataset = args.train_dataset, valid_dataset = args.valid_dataset, background_noise = args.background_noise) print('train ',len(train_dataset), 'val ', len(valid_dataset)) weights = train_dataset.make_weights_for_balanced_classes() sampler = WeightedRandomSampler(weights, len(weights)) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=sampler, pin_memory=use_gpu, num_workers=args.dataload_workers_nums) valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=use_gpu, num_workers=args.dataload_workers_nums) # a name used to save checkpoints etc. # 2. prepare the model, checkpoint full_name = '%s_%s_%s_bs%d_lr%.1e_wd%.1e' % (args.model, args.optim, args.lr_scheduler, args.batch_size, args.learning_rate, args.weight_decay) if args.comment: full_name = '%s_%s' % (full_name, args.comment) model = models.create_model(model_name=args.model, num_classes=len(CLASSES), in_channels=1) if use_gpu: model = torch.nn.DataParallel(model).cuda() criterion = torch.nn.CrossEntropyLoss() if args.optim == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=args.weight_decay) else: optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) start_timestamp = int(time.time()*1000) start_epoch = 0 best_accuracy = 0 best_loss = 1e100 global_step = 0 if args.resume: print("resuming getShapeLista checkpoint '%s'" % args.resume) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) model.float() optimizer.load_state_dict(checkpoint['optimizer']) best_accuracy = checkpoint.get('accuracy', best_accuracy) best_loss = checkpoint.get('loss', best_loss) start_epoch = checkpoint.get('epoch', start_epoch) global_step = checkpoint.get('step', global_step) del checkpoint # reduce memory if args.lr_scheduler == 'plateau': lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=args.lr_scheduler_patience, factor=args.lr_scheduler_gamma) else: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_scheduler_step_size, gamma=args.lr_scheduler_gamma, last_epoch=start_epoch-1) def get_lr(): return optimizer.param_groups[0]['lr'] writer = SummaryWriter(comment=('_speech_commands_' + full_name)) #3. train and validation print("training %s for Google speech commands..." % args.model) since = time.time() #grad_client_list = [[]] * args.clients federated = Federated(args.clients, args.matrix_size,args.num_threads) for epoch in range(start_epoch, args.max_epochs): print("epoch %3d with lr=%.02e" % (epoch, get_lr())) phase = 'train' writer.add_scalar('%s/learning_rate' % phase, get_lr(), epoch) model.train() # Set model to training mode running_loss = 0.0 it = 0 correct = 0 total = 0 #compute for each client current_client = 0 pbar = tqdm(train_dataloader, unit="audios", unit_scale=train_dataloader.batch_size, disable=False) for batch in pbar: inputs = batch['input'] inputs = torch.unsqueeze(inputs, 1) targets = batch['target'] #print(inputs.shape, targets.shape) if args.mixup: inputs, targets = mixup(inputs, targets, num_classes=len(CLASSES)) inputs = Variable(inputs, requires_grad=True) targets = Variable(targets, requires_grad=False) if use_gpu: inputs = inputs.cuda() targets = targets.cuda(async=True) outputs = model(inputs) if args.mixup: loss = mixup_cross_entropy_loss(outputs, targets) else: loss = criterion(outputs, targets) optimizer.zero_grad() loss.backward() #generate gradient list current_client_grad = torch.zeros(1,1).cuda() shape_list = [] for name, param in model.named_parameters(): if param.requires_grad: #print(name, param.grad.shape, param.grad.type())#, param.grad) #current_client_grad.append(param.grad) shape_list.append(list(param.grad.shape)) current_client_grad = torch.cat((current_client_grad, param.grad.view(-1,1)), 0) #break current_client_grad = current_client_grad[1:,:].view(-1,) #print(current_client_grad.shape) #print("ori ", current_client_grad[0].view(-1, 1)[-10:]) #print(len(current_client_grad), current_client_grad[0].shape, current_client_grad[-1].shape) #randomize the gradient, if in a new batch, generate the randomization matrix if (current_client == 0): federated.init(current_client_grad, shape_list) #print("client ", current_client, " start") federated.work_for_client(current_client, current_client_grad) #print("client", current_client, " complete") if (current_client == args.clients - 1): recovered_grad = federated.recoverGradient() ind = 0 #print(recovered_grad_in_cuda, recovered_grad_in_cuda[0].shape, r) for name, param in model.named_parameters(): if param.requires_grad: #print(param.grad, recovered_grad[ind]) param.grad = recovered_grad[ind] ind+=1 assert(ind == len(recovered_grad)) optimizer.step() #print("all clients finished") current_client = 0 else : current_client += 1 # only update the parameters when current_client == args.clients - 1 # statistics it += 1 global_step += 1 #running_loss += loss.data[0] running_loss += loss.item() pred = outputs.data.max(1, keepdim=True)[1] if args.mixup: targets = batch['target'] targets = Variable(targets, requires_grad=False).cuda(async=True) correct += pred.eq(targets.data.view_as(pred)).sum() total += targets.size(0) writer.add_scalar('%s/loss' % phase, loss.item(), global_step) # update the progress bar pbar.set_postfix({ 'loss': "%.05f" % (running_loss / it), 'acc': "%.02f%%" % (100*float(correct)/total) }) #print("[batch]\t", it, " [loss]\t ", running_loss / it, " [acc] \t", 100 * float(correct)/total) #print('------------------------------------------------------------------') #break accuracy = float(correct)/total epoch_loss = running_loss / it writer.add_scalar('%s/accuracy' % phase, 100*accuracy, epoch) writer.add_scalar('%s/epoch_loss' % phase, epoch_loss, epoch) if (accuracy > best_accuracy): best_accuracy = accuracy checkpoint = { 'epoch': epoch, 'step': global_step, 'state_dict': model.state_dict(), 'loss': epoch_loss, 'accuracy': accuracy, 'optimizer' : optimizer.state_dict(), } torch.save(checkpoint, 'checkpoints/federated-best-loss-speech-commands-checkpoint-%s.pth' % full_name) torch.save(model, '%d-%s-federated-best-loss.pth' % (start_timestamp, full_name)) del checkpoint
def main(): #### options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, help='Path to option YMAL file.') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() opt = option.parse(args.opt, is_train=True) #### distributed training settings opt['dist'] = False rank = -1 print('Disabled distributed training.') #### loading resume state if exists if opt['path'].get('resume_state', None): resume_state_path, _ = get_resume_paths(opt) # distributed resuming: all load into default GPU if resume_state_path is None: resume_state = None else: device_id = torch.cuda.current_device() resume_state = torch.load( resume_state_path, map_location=lambda storage, loc: storage.cuda(device_id)) option.check_resume(opt, resume_state['iter']) # check resume options else: resume_state = None #### mkdir and loggers if rank <= 0: # normal training (rank -1) OR distributed training (rank 0) if resume_state is None: util.mkdir_and_rename( opt['path'] ['experiments_root']) # rename experiment folder if exists util.mkdirs( (path for key, path in opt['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # config loggers. Before it, the log will not work util.setup_logger('base', opt['path']['log'], 'train_' + opt['name'], level=logging.INFO, screen=True, tofile=True) util.setup_logger('val', opt['path']['log'], 'val_' + opt['name'], level=logging.INFO, screen=True, tofile=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt)) # tensorboard logger if opt.get('use_tb_logger', False) and 'debug' not in opt['name']: version = float(torch.__version__[0:3]) if version >= 1.1: # PyTorch 1.1 from torch.utils.tensorboard import SummaryWriter else: logger.info( 'You are using PyTorch {}. Tensorboard will use [tensorboardX]' .format(version)) from tensorboardX import SummaryWriter conf_name = basename(args.opt).replace(".yml", "") exp_dir = opt['path']['experiments_root'] log_dir_train = os.path.join(exp_dir, 'tb', conf_name, 'train') log_dir_valid = os.path.join(exp_dir, 'tb', conf_name, 'valid') tb_logger_train = SummaryWriter(log_dir=log_dir_train) tb_logger_valid = SummaryWriter(log_dir=log_dir_valid) else: util.setup_logger('base', opt['path']['log'], 'train', level=logging.INFO, screen=True) logger = logging.getLogger('base') # convert to NoneDict, which returns None for missing keys opt = option.dict_to_nonedict(opt) #### random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) if rank <= 0: logger.info('Random seed: {}'.format(seed)) util.set_random_seed(seed) torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True #### create train and val dataloader dataset_ratio = 200 # enlarge the size of each epoch for phase, dataset_opt in opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) print('Dataset created') train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) total_iters = int(opt['train']['niter']) total_epochs = int(math.ceil(total_iters / train_size)) train_sampler = None train_loader = create_dataloader(train_set, dataset_opt, opt, train_sampler) if rank <= 0: logger.info( 'Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) elif phase == 'val': val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt, opt, None) if rank <= 0: logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None #### create model current_step = 0 if resume_state is None else resume_state['iter'] # current_step = 180000 model = create_model(opt, current_step) #### resume training if resume_state: logger.info('Resuming training from epoch: {}, iter: {}.'.format( resume_state['epoch'], resume_state['iter'])) start_epoch = resume_state['epoch'] current_step = resume_state['iter'] model.resume_training(resume_state) # handle optimizers and schedulers else: current_step = 0 start_epoch = 0 import numpy as np model_parameters = model.netG.named_parameters() # model_parameters = filter(lambda p: p.requires_grad, model.netG.parameters()) all_params = 0 params = 0 for n, p in model_parameters: all_params += np.prod(p.size()) if "RRDB." not in n: params += np.prod(p.size()) print("Total number of parameters: {}".format(all_params)) print("The number of parameters of flow model w/o RRDB: {}".format(params)) #### training timer = Timer() logger.info('Start training from epoch: {:d}, iter: {:d}'.format( start_epoch, current_step)) timerData = TickTock() for epoch in range(start_epoch, total_epochs + 1): if opt['dist']: train_sampler.set_epoch(epoch) timerData.tick() for _, train_data in enumerate(train_loader): timerData.tock() current_step += 1 if current_step > total_iters: break #### training model.feed_data(train_data) #### update learning rate model.update_learning_rate(current_step, warmup_iter=opt['train']['warmup_iter']) try: nll = model.optimize_parameters(current_step) except RuntimeError as e: print( "Skipping ERROR caught in nll = model.optimize_parameters(current_step): " ) print(e) if nll is None: nll = 0 #### log def eta(t_iter): return (t_iter * (opt['train']['niter'] - current_step)) / 3600 if current_step % opt['logger']['print_freq'] == 0 \ or current_step - (resume_state['iter'] if resume_state else 0) < 25: avg_time = timer.get_average_and_reset() avg_data_time = timerData.get_average_and_reset() message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}, t:{:.2e}, td:{:.2e}, eta:{:.2e}, nll:{:.3e}> '.format( epoch, current_step, model.get_current_learning_rate(), avg_time, avg_data_time, eta(avg_time), nll) print(message) timer.tick() # Reduce number of logs if current_step % 5 == 0: tb_logger_train.add_scalar('loss/nll', nll, current_step) tb_logger_train.add_scalar('lr/base', model.get_current_learning_rate(), current_step) tb_logger_train.add_scalar('time/iteration', timer.get_last_iteration(), current_step) tb_logger_train.add_scalar('time/data', timerData.get_last_iteration(), current_step) tb_logger_train.add_scalar('time/eta', eta(timer.get_last_iteration()), current_step) for k, v in model.get_current_log().items(): tb_logger_train.add_scalar(k, v, current_step) # validation if current_step % opt['train']['val_freq'] == 0 and rank <= 0: avg_psnr = 0.0 idx = 0 nlls = [] for val_data in val_loader: idx += 1 img_name = os.path.splitext( os.path.basename(val_data['LQ_path'][0]))[0] img_dir = os.path.join(opt['path']['val_images'], img_name) util.mkdir(img_dir) model.feed_data(val_data) nll = model.test() if nll is None: nll = 0 nlls.append(nll) visuals = model.get_current_visuals() sr_img = None # Save SR images for reference if hasattr(model, 'heats'): for heat in model.heats: for i in range(model.n_sample): sr_img = util.tensor2img( visuals['SR', heat, i]) # uint8 save_img_path = os.path.join( img_dir, '{:s}_{:09d}_h{:03d}_s{:d}.png'.format( img_name, current_step, int(heat * 100), i)) util.save_img(sr_img, save_img_path) else: sr_img = util.tensor2img(visuals['SR']) # uint8 save_img_path = os.path.join( img_dir, '{:s}_{:d}.png'.format(img_name, current_step)) util.save_img(sr_img, save_img_path) assert sr_img is not None # Save LQ images for reference save_img_path_lq = os.path.join( img_dir, '{:s}_LQ.png'.format(img_name)) if not os.path.isfile(save_img_path_lq): lq_img = util.tensor2img(visuals['LQ']) # uint8 util.save_img( cv2.resize(lq_img, dsize=None, fx=opt['scale'], fy=opt['scale'], interpolation=cv2.INTER_NEAREST), save_img_path_lq) # Save GT images for reference gt_img = util.tensor2img(visuals['GT']) # uint8 save_img_path_gt = os.path.join( img_dir, '{:s}_GT.png'.format(img_name)) if not os.path.isfile(save_img_path_gt): util.save_img(gt_img, save_img_path_gt) # calculate PSNR crop_size = opt['scale'] gt_img = gt_img / 255. sr_img = sr_img / 255. cropped_sr_img = sr_img[crop_size:-crop_size, crop_size:-crop_size, :] cropped_gt_img = gt_img[crop_size:-crop_size, crop_size:-crop_size, :] avg_psnr += util.calculate_psnr(cropped_sr_img * 255, cropped_gt_img * 255) avg_psnr = avg_psnr / idx avg_nll = sum(nlls) / len(nlls) # log logger.info('# Validation # PSNR: {:.4e}'.format(avg_psnr)) logger_val = logging.getLogger('val') # validation logger logger_val.info( '<epoch:{:3d}, iter:{:8,d}> psnr: {:.4e}'.format( epoch, current_step, avg_psnr)) # tensorboard logger tb_logger_valid.add_scalar('loss/psnr', avg_psnr, current_step) tb_logger_valid.add_scalar('loss/nll', avg_nll, current_step) tb_logger_train.flush() tb_logger_valid.flush() #### save models and training states if current_step % opt['logger']['save_checkpoint_freq'] == 0: if rank <= 0: logger.info('Saving models and training states.') model.save(current_step) model.save_training_state(epoch, current_step) timerData.tick() with open(os.path.join(opt['path']['root'], "TRAIN_DONE"), 'w') as f: f.write("TRAIN_DONE") if rank <= 0: logger.info('Saving the final model.') model.save('latest') logger.info('End of training.')
# if a linear CCA should get applied on the learned features extracted from the networks # it does not affect the performance on noisy MNIST significantly apply_linear_cca = True # end of parameters section ############ # Each view is stored in a gzip file separately. They will get downloaded the first time the code gets executed. # Datasets get stored under the datasets folder of user's Keras folder # normally under [Home Folder]/.keras/datasets/ data1 = load_data('noisymnist_view1.gz', 'https://www2.cs.uic.edu/~vnoroozi/noisy-mnist/noisymnist_view1.gz') data2 = load_data('noisymnist_view2.gz', 'https://www2.cs.uic.edu/~vnoroozi/noisy-mnist/noisymnist_view2.gz') # Building, training, and producing the new features by DCCA model = create_model(layer_sizes1, layer_sizes2, input_shape1, input_shape2, learning_rate, reg_par, outdim_size, use_all_singular_values) model.summary() model = train_model(model, data1, data2, epoch_num, batch_size) new_data = test_model(model, data1, data2, outdim_size, apply_linear_cca) # Training and testing of SVM with linear kernel on the view 1 with new features [test_acc, valid_acc] = svm_classify(new_data, C=0.01) print("Accuracy on view 1 (validation data) is:", valid_acc * 100.0) print("Accuracy on view 1 (test data) is:", test_acc*100.0) # Saving new features in a gzip pickled file specified by save_to print('saving new features ...') f1 = gzip.open(save_to, 'wb') thepickle.dump(new_data, f1) f1.close()
opt.how_many = 50 opt.aspect_ratio = 1.0 opt.sample_Ps = [6, ] opt.load_model = True opt.name = 'rgb_full_128' opt.input_nc = 4 opt.output_nc = 3 jump_number = 5 dataset = torchvision.datasets.ImageFolder(opt.dataroot, transform=transforms.Compose([ transforms.Resize((opt.loadSize, opt.loadSize)), transforms.ToTensor()])) dataset_loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=not opt.serial_batches) model = create_model(opt) model.setup(opt) model.eval() time = dt.datetime.now() str_now = '%02d_%02d_%02d%02d' % (time.month, time.day, time.hour, time.minute) shutil.copyfile('./checkpoints/%s/latest_net_G.pth' % opt.name, './checkpoints/%s/%s_net_G.pth' % (opt.name, str_now)) psnrs = {} mses = {} values_num = int((100 / jump_number) + 1) bar = pb.ProgressBar(max_value = values_num*values_num) k = 0 for R in range(0,101,jump_number):
def create_inference_graph(wanted_words, sample_rate, clip_duration_ms, clip_stride_ms, window_size_ms, window_stride_ms, feature_bin_count, model_architecture, preprocess): """Creates an audio model with the nodes needed for inference. Uses the supplied arguments to create a model, and inserts the input and output nodes that are needed to use the graph for inference. Args: wanted_words: Comma-separated list of the words we're trying to recognize. sample_rate: How many samples per second are in the input audio files. clip_duration_ms: How many samples to analyze for the audio pattern. clip_stride_ms: How often to run recognition. Useful for models with cache. window_size_ms: Time slice duration to estimate frequencies from. window_stride_ms: How far apart time slices should be. feature_bin_count: Number of frequency bands to analyze. model_architecture: Name of the kind of model to generate. preprocess: How the spectrogram is processed to produce features, for example 'mfcc' or 'average'. Raises: Exception: If the preprocessing mode isn't recognized. """ words_list = input_data.prepare_words_list(wanted_words.split(',')) model_settings = models.prepare_model_settings( len(words_list), sample_rate, clip_duration_ms, window_size_ms, window_stride_ms, feature_bin_count, preprocess) runtime_settings = {'clip_stride_ms': clip_stride_ms} wav_data_placeholder = tf.placeholder(tf.string, [], name='wav_data') decoded_sample_data = contrib_audio.decode_wav( wav_data_placeholder, desired_channels=1, desired_samples=model_settings['desired_samples'], name='decoded_sample_data') spectrogram = contrib_audio.audio_spectrogram( decoded_sample_data.audio, window_size=model_settings['window_size_samples'], stride=model_settings['window_stride_samples'], magnitude_squared=True) if preprocess == 'average': fingerprint_input = tf.nn.pool( tf.expand_dims(spectrogram, -1), window_shape=[1, model_settings['average_window_width']], strides=[1, model_settings['average_window_width']], pooling_type='AVG', padding='SAME') elif preprocess == 'mfcc': fingerprint_input = contrib_audio.mfcc( spectrogram, sample_rate, dct_coefficient_count=model_settings['fingerprint_width']) else: raise Exception('Unknown preprocess mode "%s" (should be "mfcc" or' ' "average")' % (preprocess)) fingerprint_size = model_settings['fingerprint_size'] reshaped_input = tf.reshape(fingerprint_input, [-1, fingerprint_size]) logits = models.create_model( reshaped_input, model_settings, model_architecture, is_training=False, runtime_settings=runtime_settings) # Create an output to use for inference. tf.nn.softmax(logits, name='labels_softmax')
from data import create_dataset from models import create_model from util.visualizer import save_images from util import html if __name__ == '__main__': opt = TestOptions().parse() # get test options # hard-code some parameters for test opt.num_threads = 0 # test code only supports num_threads = 1 opt.batch_size = 1 # test code only supports batch_size = 1 opt.serial_batches = True # disable data shuffling; comment this line if results on randomly chosen images are needed. opt.no_flip = True # no flip; comment this line if results on flipped images are needed. opt.display_id = -1 # no visdom display; the test code saves the results to a HTML file. dataset = create_dataset(opt) # create a dataset given opt.dataset_mode and other options model = create_model(opt) # create a model given opt.model and other options model.setup(opt) # regular setup: load and print networks; create schedulers # create a website web_dir = os.path.join(opt.results_dir, opt.name, '{}_{}'.format(opt.phase, opt.epoch)) # define the website directory if opt.load_iter > 0: # load_iter is 0 by default web_dir = '{:s}_iter{:d}'.format(web_dir, opt.load_iter) print('creating web directory', web_dir) webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.epoch)) # test with eval mode. This only affects layers like batchnorm and dropout. # For [pix2pix]: we use batchnorm and dropout in the original pix2pix. You can experiment it with and without eval() mode. # For [CycleGAN]: It should not affect CycleGAN as CycleGAN uses instancenorm without dropout. if opt.eval: model.eval() for i, data in enumerate(dataset): if i >= opt.num_test: # only apply our model to opt.num_test images. break
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') DATA_PATH = {'bznsyp': "BZNSYP", 'ljspeech': "LJSpeech-1.1"}[args.dataset] input_path = os.path.join(args.base_dir, 'DATA', DATA_PATH, 'training', 'train.txt') log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.lpc_targets, feeder.stop_token_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=999, keep_checkpoint_every_n_hours=2) # Train! config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. checkpoint_state = tf.train.get_checkpoint_state(log_dir) # restore_path = '%s-%d' % (checkpoint_path, args.restore_step) if checkpoint_state is not None: saver.restore(sess, checkpoint_state.model_checkpoint_path) log('Resuming from checkpoint: %s at commit: %s' % (checkpoint_state.model_checkpoint_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) if args.restore_decoder: models = [ f for f in os.listdir('pretrain') if f.find('.meta') != -1 ] decoder_ckpt_path = os.path.join( 'pretrain', models[0].replace('.meta', '')) global_vars = tf.global_variables() var_list = [] valid_scope = [ 'model/inference/decoder', 'model/inference/post_cbhg', 'model/inference/dense', 'model/inference/memory_layer' ] for v in global_vars: if v.name.find('attention') != -1: continue if v.name.find('Attention') != -1: continue for scope in valid_scope: if v.name.startswith(scope): var_list.append(v) decoder_saver = tf.train.Saver(var_list) decoder_saver.restore(sess, decoder_ckpt_path) print('restore pretrained decoder ...') feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, lpc_targets, alignment = sess.run([ model.inputs[0], model.lpc_outputs[0], model.alignments[0] ]) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) np.save(os.path.join(log_dir, 'step-%d-lpc.npy' % step), lpc_targets) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
output = model(images) top1, top5 = count_top1_top5(labels.cpu().numpy(), output.cpu().detach().numpy()) top1_TP += top1 top5_TP += top5 top1_acc = top1_TP / len(dataset) top5_acc = top5_TP / len(dataset) return top1_acc, top5_acc if __name__ == '__main__': args = parse_args() cfg = config_from_file(args.config) data_dir = cfg.DATA_DIR arch = args.arch attention = args.attention checkpoint = cfg.CHECKPOINT save_dir = os.path.join(checkpoint, '_'.join([arch, attention])) mkdir(checkpoint) mkdir(save_dir) model = create_model(arch, attention) num_params = count_params(model) print('{}_{}: {} parameters'.format(args.arch, args.attention, num_params)) if args.resume: model = trainval(model, save_dir, cfg, resume_epoch=args.load_epoch) else: model = trainval(model, save_dir, cfg)
def main(_): # Set the verbosity based on flags (default is INFO, so we see all messages) tf.compat.v1.logging.set_verbosity(FLAGS.verbosity) # Start a new TensorFlow session. sess = tf.compat.v1.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.feature_bin_count, FLAGS.preprocess) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings, FLAGS.summaries_dir) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list( map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) input_placeholder = tf.compat.v1.placeholder(tf.float32, [None, fingerprint_size], name='fingerprint_input') if FLAGS.quantize: fingerprint_min, fingerprint_max = input_data.get_features_range( model_settings) fingerprint_input = tf.quantization.fake_quant_with_min_max_args( input_placeholder, fingerprint_min, fingerprint_max) else: fingerprint_input = input_placeholder logits, dropout_prob = models.create_model(fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.compat.v1.placeholder(tf.int64, [None], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.compat.v1.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.compat.v1.name_scope('cross_entropy'): cross_entropy_mean = tf.compat.v1.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) if FLAGS.quantize: try: tf.contrib.quantize.create_training_graph(quant_delay=0) except ImportError as e: msg = e.args[0] msg += ( '\n\n The --quantize option still requires contrib, which is not ' 'part of TensorFlow 2.0. Please install a previous version:' '\n `pip install tensorflow<=1.15`') e.args = (msg, ) raise e with tf.compat.v1.name_scope('train'), tf.control_dependencies( control_dependencies): learning_rate_input = tf.compat.v1.placeholder( tf.float32, [], name='learning_rate_input') if FLAGS.optimizer == 'gradient_descent': train_step = tf.compat.v1.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) elif FLAGS.optimizer == 'momentum': train_step = tf.compat.v1.train.MomentumOptimizer( learning_rate_input, .9, use_nesterov=True).minimize(cross_entropy_mean) else: raise Exception('Invalid Optimizer') predicted_indices = tf.argmax(input=logits, axis=1) correct_prediction = tf.equal(predicted_indices, ground_truth_input) confusion_matrix = tf.math.confusion_matrix(labels=ground_truth_input, predictions=predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean( input_tensor=tf.cast(correct_prediction, tf.float32)) with tf.compat.v1.get_default_graph().name_scope('eval'): tf.compat.v1.summary.scalar('cross_entropy', cross_entropy_mean) tf.compat.v1.summary.scalar('accuracy', evaluation_step) global_step = tf.compat.v1.train.get_or_create_global_step() increment_global_step = tf.compat.v1.assign(global_step, global_step + 1) saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.compat.v1.summary.merge_all(scope='eval') train_writer = tf.compat.v1.summary.FileWriter( FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.compat.v1.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.compat.v1.global_variables_initializer().run() loaded = tf.saved_model.load("/models/ConvNet070220.pb") print(list(loaded.signature.keys())) # start_step = 1 # if FLAGS.start_checkpoint: # models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) # start_step = global_step.eval(session=sess) # tf.compat.v1.logging.info('Training from step: %d ', start_step) # # Save graph.pbtxt. # tf.io.write_graph(sess.graph_def, FLAGS.train_dir, # FLAGS.model_architecture + '.pbtxt') # # Save list of words. # with gfile.GFile( # os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), # 'w') as f: # f.write('\n'.join(audio_processor.words_list)) # # Training loop. # training_steps_max = np.sum(training_steps_list) # for training_step in xrange(start_step, training_steps_max + 1): # # Figure out what the current learning rate is. # training_steps_sum = 0 # for i in range(len(training_steps_list)): # training_steps_sum += training_steps_list[i] # if training_step <= training_steps_sum: # learning_rate_value = learning_rates_list[i] # break # # Pull the audio samples we'll use for training. # train_fingerprints, train_ground_truth = audio_processor.get_data( # FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, # FLAGS.background_volume, time_shift_samples, 'training', sess) # # Run the graph with this batch of training data. # train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( # [ # merged_summaries, # evaluation_step, # cross_entropy_mean, # train_step, # increment_global_step, # ], # feed_dict={ # fingerprint_input: train_fingerprints, # ground_truth_input: train_ground_truth, # learning_rate_input: learning_rate_value, # dropout_prob: 0.5 # }) # train_writer.add_summary(train_summary, training_step) # tf.compat.v1.logging.info( # 'Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % # (training_step, learning_rate_value, train_accuracy * 100, # cross_entropy_value)) # is_last_step = (training_step == training_steps_max) # if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: # set_size = audio_processor.set_size('validation') # total_accuracy = 0 # total_conf_matrix = None # for i in xrange(0, set_size, FLAGS.batch_size): # validation_fingerprints, validation_ground_truth = ( # audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, # 0.0, 0, 'validation', sess)) # # Run a validation step and capture training summaries for TensorBoard # # with the `merged` op. # validation_summary, validation_accuracy, conf_matrix = sess.run( # [merged_summaries, evaluation_step, confusion_matrix], # feed_dict={ # fingerprint_input: validation_fingerprints, # ground_truth_input: validation_ground_truth, # dropout_prob: 1.0 # }) # validation_writer.add_summary(validation_summary, training_step) # batch_size = min(FLAGS.batch_size, set_size - i) # total_accuracy += (validation_accuracy * batch_size) / set_size # if total_conf_matrix is None: # total_conf_matrix = conf_matrix # else: # total_conf_matrix += conf_matrix # tf.compat.v1.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) # tf.compat.v1.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % # (training_step, total_accuracy * 100, set_size)) # # Save the model checkpoint periodically. # if (training_step % FLAGS.save_step_interval == 0 or # training_step == training_steps_max): # checkpoint_path = os.path.join(FLAGS.train_dir, # FLAGS.model_architecture + '.ckpt') # tf.compat.v1.logging.info('Saving to "%s-%d"', checkpoint_path, # training_step) # saver.save(sess, checkpoint_path, global_step=training_step) set_size = audio_processor.set_size('testing') tf.compat.v1.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.compat.v1.logging.warn('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.compat.v1.logging.warn('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
use_multigap=True # X_train, Y_train, X_test, Y_test = prepare_data(use_distribution=use_distribution) X_train, Y_train,X_test, Y_test,X_train_text, X_test_text,embedding_layer = prepare_data(use_distribution=use_distribution, use_semantics=use_semantics, use_comments=use_comments) # X_train, Y_train,X_test, Y_test= prepare_data(use_distribution=use_distribution, use_semantics=False) # X_train, Y_train, Y_train_semantics, X_test, Y_test, Y_test_semantics, X_train_text, X_test_text, embedding_layer = prepare_data(use_distribution=use_distribution, use_semantics=use_semantics, use_comments=use_comments) ## Without image data # _, Y_train,_, Y_test,X_train_text, X_test_text,embedding_layer = prepare_data(use_distribution=use_distribution, use_semantics=use_semantics, use_comments=use_comments, imageDataAvailable=False) # BEST MODEL model = create_model('weights/2017-01-25 22_56_09 - distribution_2layergru_extra_conv_layer.h5', use_distribution=use_distribution, use_semantics=use_semantics,use_multigap=use_multigap,use_comments=use_comments, embedding_layer=embedding_layer,extra_conv_layer=True,textInputMaxLength=maxlen,embedding_dim=EMBEDDING_DIM) # model = create_model('weights/googlenet_aesthetics_weights.h5', # use_distribution=use_distribution, use_semantics=use_semantics,use_multigap=True, heatmap=False) # MODEL WITH EXTRA CONV AND NO TEXT # model = create_model('weights/2017-01-27 12:41:36 - distribution_extra_conv_layer.h5', # use_distribution=use_distribution, use_semantics=use_semantics, # use_multigap=True,extra_conv_layer=True) # RAPID STYLE # model = create_model('weights/googlenet_aesthetics_weights.h5', # use_distribution=use_distribution, use_semantics=True, # use_multigap=False,extra_conv_layer=False, rapid_style=True)
def main(): # tf.compat.v1.enable_v2_behavior() print("tensorflow version =", tf.__version__) # get and save params of this run params = get_params() # dataset = Seq2SeqDataset_copy( # input_path=params.input_dir, # input_window_length_samples =params.input_window_length_samples, # output_window_length_samples=params.output_window_length_samples, # ) # train_dataset = tf.data.Dataset.from_generator((train_x, train_y),output_types=(tf.float64,tf.float64)) # train_dataset = train_dataset.shuffle(buffer_size=100000) # train_dataset = train_dataset.repeat() datasetD = make_seq_2_seq_dataset(params) train_x = datasetD['train']['x'] train_y = datasetD['train']['y'] test_x = datasetD['test']['x'] test_y = datasetD['test']['y'] val_x = datasetD['val']['x'] val_y = datasetD['val']['y'] train_scenarios = datasetD['train']['scenarios'] test_scenarios = datasetD['test']['scenarios'] val_scenarios = datasetD['val']['scenarios'] params.scaleD = datasetD['scaleD'] # store scaleD in params_out.yml model = create_model(params) model.compile(optimizer=params.optimizer, loss=params.loss, metrics=get_metrics(params)) print(model.summary()) history = model.fit([train_x], [train_y], batch_size=32, epochs=params.num_epochs, callbacks=make_callbacks(params), validation_data=([val_x], [val_y]), validation_freq=1) # history = model.fit(dataset.train_dataset, # epochs=params.num_epochs, # steps_per_epoch=int(dataset.num_train), # # callbacks=make_callbacks(params), # validation_data=dataset.val_dataset, # validation_steps=int(dataset.num_val), # validation_freq=1) with open(os.path.join(params.output_dir, 'history.pickle'), 'wb') as f: pickle.dump(history.history, f) # score = model.evaluate(dataset.test_dataset) score = model.evaluate([test_x], [test_y]) score = [float(s) for s in score] # convert values in score from np.float to float params.score = score # store score in params_out.yml if 'best_checkpoint' in params.callback_list: # load weights from best checkpoint model.load_weights( os.path.join(params.output_dir, "best-checkpoint-weights.h5")) elif 'checkpoint' in params.callback_list: pass save_model(params, model) with open(os.path.join(params.output_dir, 'params_out.yml'), 'w') as f: yaml.dump(vars(params), f, default_flow_style=False) plot_metrics(params) plot_predictions(params, model, test_scenarios)
def train(args, game_config): state_shape = game_config['state_shape'] env = game_config['enviroment'] preprocessing = game_config['preprocessing'] actions = game_config['actions'] # populates replay memory with some random sequences state0_rm = np.zeros([args.replay_size, *state_shape], dtype=np.int8) action_rm = np.zeros([args.replay_size], dtype=np.int8) reward_rm = np.zeros([args.replay_size], dtype=np.int8) state1_rm = np.zeros([args.replay_size, *state_shape], dtype=np.int8) terminal_rm = np.zeros([args.replay_size], dtype=np.bool) # Initialize action value function with random with random weights model = create_model(args, game_config) # initialize target action-value function ^Q with same wieghts model_target = model_from_yaml(model.to_yaml()) model_target.set_weights(model.get_weights()) # keep track variables epsilon = args.initial_epsilon epsilon_reduction = (args.initial_epsilon - args.final_epsilon)/args.final_epsilon_annealing episodes = 0 model_updates = 0 idx_rm = 0 steps = 0 idxs_rm = np.arange(args.replay_size) idxs_batch = np.arange(args.batch_size) nb_active_rm = 0 total_frames = 0 pbar_frames = tqdm.tqdm(total=args.nb_frame, desc='0000 episodes', leave=True, position=0) while total_frames < args.nb_frame: obs0 = np.zeros(state_shape, dtype=np.int8) obs1 = np.zeros(state_shape, dtype=np.int8) obs0[:] = obs1[:] = preprocessing(env.reset()) a = max(1, int(total_frames/max(1, episodes))) pbar_episode = tqdm.tqdm(total=a, desc=' 0 reward', leave=False, position=1) t = 0 treward = 0 done = False nb_actions = 0 while not done and total_frames < args.nb_frame: if (t % args.frame_skip) == 0: if np.random.rand() < epsilon: action_idx = np.random.randint(low=0, high=len(actions)) else: qval = model.predict(np.array([obs0]), verbose=0) action_idx = qval.argmax() nb_actions += 1 ob, reward, done, info = env.step(actions[action_idx]) treward += reward if (t % args.frame_skip) == 0: # update state obs1[1:] = obs1[:-1] obs1[0] = preprocessing(ob) reward = np.clip(reward, -1, 1) # save replay memory state0_rm[idx_rm] = obs0[:] action_rm[idx_rm] = action_idx reward_rm[idx_rm] = reward state1_rm[idx_rm] = obs1[:] terminal_rm[idx_rm] = int(done) # set last state obs0[:] = obs1[:] nb_active_rm = min(nb_active_rm + 1, args.replay_size) # update replay idx idx_rm = (idx_rm + 1) % args.replay_size if nb_active_rm >= args.batch_size and nb_actions == args.update_frequency: nb_actions = 0 # sample random minibatch of transitions from replay memories idxs = np.random.choice(idxs_rm[:nb_active_rm], size=args.batch_size) qamax = np.max(model_target.predict(state1_rm[idxs]), axis=1) y_q = model.predict(state0_rm[idxs]) y_q_target = reward_rm[idxs] + (1.0-terminal_rm[idxs])*args.gamma*qamax y_q[idxs_batch, action_rm[idxs]] = y_q_target # train on batch train_loss = model.train_on_batch(state0_rm[idxs], y_q) model_updates += 1 # update model_target every C updates if (model_updates % args.C) == 0: model_target.set_weights(model.get_weights()) fname = 'weights/{0}/updates_{1}.h5'.format(args.game, model_updates) model.save_weights(fname) t += 1 epsilon = max(epsilon - epsilon_reduction, args.final_epsilon) pbar_episode.total = max(pbar_episode.total, t) pbar_episode.set_description('{0:4d} reward'.format(int(treward))) pbar_episode.update(1) pbar_frames.update(1) if args.render: env.render() pbar_episode.close() episodes += 1 total_frames += t pbar_frames.set_description('{0:04d} episodes'.format(episodes)) pbar_frames.close()
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run([global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def main(): # options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, required=True, help='Path to option JSON file.') opt = option.parse(parser.parse_args().opt, is_train=True) util.mkdir_and_rename( opt['path']['experiments_root']) # rename old experiments if exists util.mkdirs((path for key, path in opt['path'].items() if not key == 'experiments_root' and \ not key == 'pretrain_model_G' and not key == 'pretrain_model_D')) option.save(opt) opt = option.dict_to_nonedict( opt) # Convert to NoneDict, which return None for missing key. # print to file and std_out simultaneously sys.stdout = PrintLogger(opt['path']['log']) # random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) print("Random Seed: ", seed) random.seed(seed) torch.manual_seed(seed) # create train and val dataloader for phase, dataset_opt in opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) print('Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) total_iters = int(opt['train']['niter']) total_epoches = int(math.ceil(total_iters / train_size)) print('Total epoches needed: {:d} for iters {:,d}'.format( total_epoches, total_iters)) train_loader = create_dataloader(train_set, dataset_opt) batch_size_per_month = dataset_opt['batch_size'] batch_size_per_day = int( opt['datasets']['train']['batch_size_per_day']) num_month = int(opt['train']['num_month']) num_day = int(opt['train']['num_day']) use_dci = False if 'use_dci' not in opt['train'] else opt['train'][ 'use_dci'] elif phase == 'val': val_dataset_opt = dataset_opt val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt) print('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None # Create model model = create_model(opt) # create logger logger = Logger(opt) current_step = 0 start_time = time.time() print('---------- Start training -------------') validate(val_loader, opt, model, current_step, 0, logger) for epoch in range(num_month): for i, train_data in enumerate(train_loader): # get the code if use_dci: cur_month_code = get_code_for_data_two_lpips( model, train_data, opt) else: code_val_0 = torch.randn(train_data['LR'].shape[0], int(opt['network_G']['in_code_nc']), train_data['LR'].shape[2], train_data['LR'].shape[3]) code_val_1 = torch.randn(train_data['LR'].shape[0], int(opt['network_G']['in_code_nc']), train_data['LR'].shape[2] * 2, train_data['LR'].shape[3] * 2) cur_month_code = [code_val_0, code_val_1] for j in range(num_day): current_step += 1 # get the sliced data cur_day_batch_start_idx = ( j * batch_size_per_day) % batch_size_per_month cur_day_batch_end_idx = cur_day_batch_start_idx + batch_size_per_day if cur_day_batch_end_idx > batch_size_per_month: cur_day_batch_idx = np.hstack( (np.arange(cur_day_batch_start_idx, batch_size_per_month), np.arange(cur_day_batch_end_idx - batch_size_per_month))) else: cur_day_batch_idx = slice(cur_day_batch_start_idx, cur_day_batch_end_idx) cur_day_train_data = { 'LR': train_data['LR'][cur_day_batch_idx], 'HR': train_data['HR'][cur_day_batch_idx] } code = [] for gen_code in cur_month_code: code.append(gen_code[cur_day_batch_idx]) # training model.feed_data(cur_day_train_data, code=code) model.optimize_parameters(j) time_elapsed = time.time() - start_time start_time = time.time() # log if current_step % opt['logger']['print_freq'] == 0: logs = model.get_current_log() print_rlt = OrderedDict() print_rlt['model'] = opt['model'] print_rlt['epoch'] = epoch print_rlt['iters'] = current_step print_rlt['time'] = time_elapsed for k, v in logs.items(): print_rlt[k] = v print_rlt['lr'] = model.get_current_learning_rate() logger.print_format_results('train', print_rlt) # save models if current_step % opt['logger']['save_checkpoint_freq'] == 0: print('Saving the model at the end of iter {:d}.'.format( current_step)) model.save(current_step) # validation if current_step % opt['train']['val_freq'] == 0: validate(val_loader, opt, model, current_step, epoch, logger) # update learning rate model.update_learning_rate() print('Saving the final model.') model.save('latest') print('End of training.')
def main(): args = parser.parse_args() # The Distiller library writes logs to the Python logger, so we configure it. global msglogger timestr = time.strftime("%Y.%m.%d-%H%M%S") filename = timestr if args.name is None else args.name + '___' + timestr logdir = './logs' + '/' + filename if not os.path.exists(logdir): os.makedirs(logdir) log_filename = os.path.join(logdir, filename + '.log') logging.config.fileConfig(os.path.join(script_dir, 'logging.conf'), defaults={'logfilename': log_filename}) msglogger = logging.getLogger() msglogger.info('Log file for this run: ' + os.path.realpath(log_filename)) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state(sys.argv, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) start_epoch = 0 best_top1 = 0 if args.deterministic: # Experiment reproducibility is sometimes important. Pete Warden expounded about this # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ # In Pytorch, support for deterministic execution is still a bit clunky. if args.workers > 1: msglogger.error( 'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1' ) exit(1) # Use a well-known seed, for repeatability of experiments torch.manual_seed(0) random.seed(0) np.random.seed(0) cudnn.deterministic = True else: # This issue: https://github.com/pytorch/pytorch/issues/3659 # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled. cudnn.benchmark = True if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: msglogger.error( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) exit(1) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: msglogger.error( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) exit(1) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Infer the dataset from the model name args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet' # Create the model model = create_model(args.pretrained, args.dataset, args.arch, device_ids=args.gpus) compression_scheduler = None # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(logdir) pylogger = PythonLogger(msglogger) # We can optionally resume from a checkpoint if args.resume: model, compression_scheduler, start_epoch = apputils.load_checkpoint( model, chkpt_file=args.resume) if 'resnet' in args.arch and 'cifar' in args.arch: distiller.resnet_cifar_remove_layers(model) #model = distiller.resnet_cifar_remove_channels(model, compression_scheduler.zeros_mask_dict) # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info("Optimizer (%s): momentum=%s decay=%s", type(optimizer), args.momentum, args.weight_decay) # This sample application can be invoked to produce various summary reports. if args.summary: which_summary = args.summary if which_summary == 'png': apputils.draw_img_classifier_to_file(model, 'model.png', args.dataset) else: distiller.model_summary(model, optimizer, which_summary, args.dataset) exit() # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. train_loader, val_loader, test_loader, _ = apputils.load_data( args.dataset, os.path.expanduser(args.data), args.batch_size, args.workers, args.deterministic) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) activations_sparsity = None if args.activation_stats: # If your model has ReLU layers, then those layers have sparse activations. # ActivationSparsityCollector will collect information about this sparsity. # WARNING! Enabling activation sparsity collection will significantly slow down training! activations_sparsity = ActivationSparsityCollector(model) if args.sensitivity is not None: # This sample application can be invoked to execute Sensitivity Analysis on your # model. The ouptut is saved to CSV and PNG. msglogger.info("Running sensitivity tests") test_fnc = partial(test, test_loader=test_loader, criterion=criterion, loggers=[pylogger], print_freq=args.print_freq) which_params = [ param_name for param_name, _ in model.named_parameters() ] sensitivity = distiller.perform_sensitivity_analysis( model, net_params=which_params, sparsities=np.arange(0.0, 0.50, 0.05) if args.sensitivity == 'filter' else np.arange(0.0, 0.95, 0.05), test_func=test_fnc, group=args.sensitivity) distiller.sensitivities_to_png(sensitivity, 'sensitivity.png') distiller.sensitivities_to_csv(sensitivity, 'sensitivity.csv') exit() if args.evaluate: # This sample application can be invoked to evaluate the accuracy of your model on # the test dataset. # You can optionally quantize the model to 8-bit integer before evaluation. # For example: # python3 compress_classifier.py --arch resnet20_cifar ../data.cifar10 -p=50 --resume=checkpoint.pth.tar --evaluate if args.quantize: model.cpu() quantizer = quantization.SymmetricLinearQuantizer(model, 8, 8) quantizer.prepare_model() model.cuda() test(test_loader, model, criterion, [pylogger], args.print_freq) exit() if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. source = args.compress msglogger.info("Compression schedule (source=%s)", source) compression_scheduler = distiller.CompressionScheduler(model) distiller.config.fileConfig(model, optimizer, compression_scheduler, args.compress, msglogger) for epoch in range(start_epoch, start_epoch + args.epochs): # This is the main training loop. msglogger.info('\n') if compression_scheduler: compression_scheduler.on_epoch_begin(epoch) # Train for one epoch train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers=[tflogger, pylogger], print_freq=args.print_freq, log_params_hist=args.log_params_histograms) distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger]) if args.activation_stats: distiller.log_activation_sparsity(epoch, loggers=[tflogger, pylogger], collector=activations_sparsity) # evaluate on validation set top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args.print_freq, epoch) stats = ('Peformance/Validation/', OrderedDict([('Loss', vloss), ('Top1', top1), ('Top5', top5)])) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) if compression_scheduler: compression_scheduler.on_epoch_end(epoch) # remember best top1 and save checkpoint is_best = top1 > best_top1 best_top1 = max(top1, best_top1) apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler, best_top1, is_best, args.name) # Finally run results on the test set test(test_loader, model, criterion, [pylogger], args.print_freq)
def load_model(model_path): model = create_model() model.load_weights(model_path) return model
def main(): #### options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, help='Path to option JSON file.') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() opt = option.parse(args.opt, is_train=True) #### distributed training settings if args.launcher == 'none': # disabled distributed training opt['dist'] = False rank = -1 print('Disabled distributed training.') else: opt['dist'] = True init_dist() world_size = torch.distributed.get_world_size() rank = torch.distributed.get_rank() #### loading resume state if exists if opt['path'].get('resume_state', None): # distributed resuming: all load into default GPU device_id = torch.cuda.current_device() resume_state = torch.load( opt['path']['resume_state'], map_location=lambda storage, loc: storage.cuda(device_id)) option.check_resume(opt, resume_state['iter']) # check resume options else: resume_state = None #### mkdir and loggers if rank <= 0: # normal training (rank -1) OR distributed training (rank 0) if resume_state is None: util.mkdir_and_rename( opt['path'] ['experiments_root']) # rename experiment folder if exists util.mkdirs( (path for key, path in opt['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # config loggers. Before it, the log will not work util.setup_logger('base', opt['path']['log'], 'train_' + opt['name'], level=logging.INFO, screen=True, tofile=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: version = float(torch.__version__[0:3]) if version >= 1.1: # PyTorch 1.1 from torch.utils.tensorboard import SummaryWriter else: logger.info( 'You are using PyTorch {}. Tensorboard will use [tensorboardX]' .format(version)) from tensorboardX import SummaryWriter tb_logger = SummaryWriter(log_dir='../tb_logger/' + opt['name']) else: util.setup_logger('base', opt['path']['log'], 'train', level=logging.INFO, screen=True) logger = logging.getLogger('base') # convert to NoneDict, which returns None for missing keys opt = option.dict_to_nonedict(opt) #### random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) if rank <= 0: logger.info('Random seed: {}'.format(seed)) util.set_random_seed(seed) torch.backends.cudnn.benckmark = True # torch.backends.cudnn.deterministic = True #### create train and val dataloader dataset_ratio = 200 # enlarge the size of each epoch for phase, dataset_opt in opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) total_iters = int(opt['train']['niter']) total_epochs = int(math.ceil(total_iters / train_size)) if opt['dist']: train_sampler = DistIterSampler(train_set, world_size, rank, dataset_ratio) total_epochs = int( math.ceil(total_iters / (train_size * dataset_ratio))) else: train_sampler = None train_loader = create_dataloader(train_set, dataset_opt, opt, train_sampler) if rank <= 0: logger.info( 'Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) elif phase == 'val': pass ''' val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt, opt, None) if rank <= 0: logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) ''' else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None #### create model model = create_model(opt) #### resume training if resume_state: logger.info('Resuming training from epoch: {}, iter: {}.'.format( resume_state['epoch'], resume_state['iter'])) start_epoch = resume_state['epoch'] current_step = resume_state['iter'] model.resume_training(resume_state) # handle optimizers and schedulers else: current_step = 0 start_epoch = 0 #### training logger.info('Start training from epoch: {:d}, iter: {:d}'.format( start_epoch, current_step)) for epoch in range(start_epoch, total_epochs + 1): if opt['dist']: train_sampler.set_epoch(epoch) for _, train_data in enumerate(train_loader): current_step += 1 if current_step > total_iters: break #### update learning rate model.update_learning_rate(current_step, warmup_iter=opt['train']['warmup_iter']) #### training model.feed_data(train_data) model.optimize_parameters(current_step) #### log if current_step % opt['logger']['print_freq'] == 0: logs = model.get_current_log() message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> '.format( epoch, current_step, model.get_current_learning_rate()) for k, v in logs.items(): message += '{:s}: {:.4e} '.format(k, v) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: if rank <= 0: tb_logger.add_scalar(k, v, current_step) if rank <= 0: logger.info(message) #### validation # currently, it does not support validation during training #### save models and training states if current_step % opt['logger']['save_checkpoint_freq'] == 0: if rank <= 0: logger.info('Saving models and training states.') model.save(current_step) model.save_training_state(epoch, current_step) if rank <= 0: logger.info('Saving the final model.') model.save('latest') logger.info('End of training.')
def main(): #### options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, help='Path to option YMAL file.') parser.add_argument('--launcher', choices=['none', 'pytorch'], default='none', help='job launcher') parser.add_argument('--local_rank', type=int, default=0) args = parser.parse_args() opt = option.parse(args.opt, is_train=True) #### distributed training settings if args.launcher == 'none': # disabled distributed training opt['dist'] = False rank = -1 print('Disabled distributed training.') else: opt['dist'] = True init_dist() world_size = torch.distributed.get_world_size() rank = torch.distributed.get_rank() #### loading resume state if exists if opt['path'].get('resume_state', None): # distributed resuming: all load into default GPU device_id = torch.cuda.current_device() resume_state = torch.load( opt['path']['resume_state'], map_location=lambda storage, loc: storage.cuda(device_id)) option.check_resume(opt, resume_state['iter']) # check resume options else: resume_state = None #### mkdir and loggers if rank <= 0: # normal training (rank -1) OR distributed training (rank 0) if resume_state is None: util.mkdir_and_rename( opt['path'] ['experiments_root']) # rename experiment folder if exists util.mkdirs( (path for key, path in opt['path'].items() if not key == 'experiments_root' and 'pretrain_model' not in key and 'resume' not in key)) # config loggers. Before it, the log will not work util.setup_logger('base', opt['path']['log'], 'train_' + opt['name'], level=logging.INFO, screen=True, tofile=True) util.setup_logger('val', opt['path']['log'], 'val_' + opt['name'], level=logging.INFO, screen=True, tofile=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: version = float(torch.__version__[0:3]) if version >= 1.1: # PyTorch 1.1 from torch.utils.tensorboard import SummaryWriter else: logger.info( 'You are using PyTorch {}. Tensorboard will use [tensorboardX]' .format(version)) from tensorboardX import SummaryWriter tb_logger = SummaryWriter(log_dir='../tb_logger/' + opt['name']) else: util.setup_logger('base', opt['path']['log'], 'train', level=logging.INFO, screen=True) logger = logging.getLogger('base') # convert to NoneDict, which returns None for missing keys opt = option.dict_to_nonedict(opt) #### random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) if rank <= 0: logger.info('Random seed: {}'.format(seed)) util.set_random_seed(seed) torch.backends.cudnn.benchmark = True # torch.backends.cudnn.deterministic = True #### create train and val dataloader dataset_ratio = 200 # enlarge the size of each epoch for phase, dataset_opt in opt['datasets'].items(): if phase == 'train': train_set = create_dataset(dataset_opt) train_size = int( math.ceil(len(train_set) / dataset_opt['batch_size'])) total_iters = int(opt['train']['niter']) total_epochs = int(math.ceil(total_iters / train_size)) if opt['dist']: train_sampler = DistIterSampler(train_set, world_size, rank, dataset_ratio) total_epochs = int( math.ceil(total_iters / (train_size * dataset_ratio))) else: train_sampler = None train_loader = create_dataloader(train_set, dataset_opt, opt, train_sampler) if rank <= 0: logger.info( 'Number of train images: {:,d}, iters: {:,d}'.format( len(train_set), train_size)) logger.info('Total epochs needed: {:d} for iters {:,d}'.format( total_epochs, total_iters)) elif phase == 'val': val_set = create_dataset(dataset_opt) val_loader = create_dataloader(val_set, dataset_opt, opt, None) if rank <= 0: logger.info('Number of val images in [{:s}]: {:d}'.format( dataset_opt['name'], len(val_set))) else: raise NotImplementedError( 'Phase [{:s}] is not recognized.'.format(phase)) assert train_loader is not None #### create model model = create_model(opt) #### resume training if resume_state: logger.info('Resuming training from epoch: {}, iter: {}.'.format( resume_state['epoch'], resume_state['iter'])) start_epoch = resume_state['epoch'] current_step = resume_state['iter'] model.resume_training(resume_state) # handle optimizers and schedulers else: current_step = 0 start_epoch = 0 images = next(iter(train_loader))['GT'] print(images) grid = torchvision.utils.make_grid(images) tb_logger.add_image('images', grid, 0) tb_logger.add_graph(model.netG.module, images.cuda()) #### training logger.info('Start training from epoch: {:d}, iter: {:d}'.format( start_epoch, current_step)) for epoch in range(start_epoch, total_epochs + 1): if opt['dist']: train_sampler.set_epoch(epoch) for _, train_data in enumerate(train_loader): current_step += 1 if current_step > total_iters: break #### training model.feed_data(train_data) model.optimize_parameters(current_step) #### update learning rate model.update_learning_rate(current_step, warmup_iter=opt['train']['warmup_iter']) #### log if current_step % opt['logger']['print_freq'] == 0: logs = model.get_current_log() message = '<epoch:{:3d}, iter:{:8,d}, lr:{:.3e}> '.format( epoch, current_step, model.get_current_learning_rate()) for k, v in logs.items(): message += '{:s}: {:.4e} '.format(k, v) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: if rank <= 0: tb_logger.add_scalar(k, v, current_step) if rank <= 0: logger.info(message) # validation if current_step % opt['train']['val_freq'] == 0 and rank <= 0: avg_psnr = 0.0 idx = 0 for val_data in val_loader: idx += 1 img_name = os.path.splitext( os.path.basename(val_data['LQ_path'][0]))[0] img_dir = os.path.join(opt['path']['val_images'], img_name) util.mkdir(img_dir) model.feed_data(val_data) model.test() images = val_data['GT'] grid = torchvision.utils.make_grid(images) tb_logger.add_image('images', grid, 0) tb_logger.add_graph(model.netG.module, images.cuda()) visuals = model.get_current_visuals() sr_img = util.tensor2img(visuals['SR']) # uint8 gt_img = util.tensor2img(visuals['GT']) # uint8 lr_img = util.tensor2img(visuals['LR']) gtl_img = util.tensor2img(visuals['LR_ref']) # Save SR images for reference save_img_path = os.path.join( img_dir, '{:s}_{:d}.png'.format(img_name, current_step)) util.save_img(sr_img, save_img_path) # Save LR images save_img_path_L = os.path.join( img_dir, '{:s}_forwLR_{:d}.png'.format(img_name, current_step)) util.save_img(lr_img, save_img_path_L) # Save ground truth if current_step == opt['train']['val_freq']: save_img_path_gt = os.path.join( img_dir, '{:s}_GT_{:d}.png'.format(img_name, current_step)) util.save_img(gt_img, save_img_path_gt) save_img_path_gtl = os.path.join( img_dir, '{:s}_LR_ref_{:d}.png'.format( img_name, current_step)) util.save_img(gtl_img, save_img_path_gtl) # calculate PSNR crop_size = opt['scale'] gt_img = gt_img / 255. sr_img = sr_img / 255. cropped_sr_img = sr_img[crop_size:-crop_size, crop_size:-crop_size, :] cropped_gt_img = gt_img[crop_size:-crop_size, crop_size:-crop_size, :] avg_psnr += util.calculate_psnr(cropped_sr_img * 255, cropped_gt_img * 255) avg_psnr = avg_psnr / idx # log logger.info('# Validation # PSNR: {:.4e}.'.format(avg_psnr)) logger_val = logging.getLogger('val') # validation logger logger_val.info( '<epoch:{:3d}, iter:{:8,d}> psnr: {:.4e}.'.format( epoch, current_step, avg_psnr)) # tensorboard logger if opt['use_tb_logger'] and 'debug' not in opt['name']: tb_logger.add_scalar('psnr', avg_psnr, current_step) #### save models and training states if current_step % opt['logger']['save_checkpoint_freq'] == 0: if rank <= 0: logger.info('Saving models and training states.') model.save(current_step) model.save_training_state(epoch, current_step) if rank <= 0: logger.info('Saving the final model.') model.save('latest') logger.info('End of training.')
def main(): # options parser = argparse.ArgumentParser() parser.add_argument('-opt', type=str, required=True, help='Path to options JSON file.') opt = option.parse(parser.parse_args().opt, is_train=False) util.mkdirs((path for key, path in opt['path'].items() if not key == 'pretrain_model_G')) opt = option.dict_to_nonedict(opt) util.setup_logger(None, opt['path']['log'], 'test.log', level=logging.INFO, screen=True) logger = logging.getLogger('base') logger.info(option.dict2str(opt)) # Create test dataset and dataloader test_loaders = [] for phase, dataset_opt in sorted(opt['datasets'].items()): test_set = create_dataset(dataset_opt) test_loader = create_dataloader(test_set, dataset_opt) logger.info('Number of test images in [{:s}]: {:d}'.format( dataset_opt['name'], len(test_set))) test_loaders.append(test_loader) # Create model model = create_model(opt) for test_loader in test_loaders: test_set_name = test_loader.dataset.opt['name'] logger.info('\nTesting [{:s}]...'.format(test_set_name)) test_start_time = time.time() dataset_dir = os.path.join(opt['path']['results_root'], test_set_name) util.mkdir(dataset_dir) test_results = OrderedDict() test_results['psnr'] = [] test_results['ssim'] = [] test_results['psnr_y'] = [] test_results['ssim_y'] = [] for data in test_loader: need_HR = False if test_loader.dataset.opt[ 'dataroot_HR'] is None else True model.feed_data(data, need_HR=need_HR) img_path = data['LR_path'][0] img_name = os.path.splitext(os.path.basename(img_path))[0] model.test() # test visuals = model.get_current_visuals(need_HR=need_HR) img_c = util.tensor2img(visuals['img_c']) # uint8 img_s = util.tensor2img(visuals['img_s']) # uint8 img_p = util.tensor2img(visuals['img_p']) # uint8 # save images suffix = opt['suffix'] if suffix: save_c_img_path = os.path.join(dataset_dir, img_name + suffix + '_c.png') save_s_img_path = os.path.join(dataset_dir, img_name + suffix + '_s.png') save_p_img_path = os.path.join(dataset_dir, img_name + suffix + '_p.png') else: save_c_img_path = os.path.join(dataset_dir, img_name + '_c.png') save_s_img_path = os.path.join(dataset_dir, img_name + '_s.png') save_p_img_path = os.path.join(dataset_dir, img_name + '_p.png') util.save_img(img_c, save_c_img_path) util.save_img(img_s, save_s_img_path) util.save_img(img_p, save_p_img_path)
def create_inference_graph(wanted_words, sample_rate, clip_duration_ms, clip_stride_ms, window_size_ms, window_stride_ms, feature_bin_count, model_architecture, preprocess): """Creates an audio model with the nodes needed for inference. Uses the supplied arguments to create a model, and inserts the input and output nodes that are needed to use the graph for inference. Args: wanted_words: Comma-separated list of the words we're trying to recognize. sample_rate: How many samples per second are in the input audio files. clip_duration_ms: How many samples to analyze for the audio pattern. clip_stride_ms: How often to run recognition. Useful for models with cache. window_size_ms: Time slice duration to estimate frequencies from. window_stride_ms: How far apart time slices should be. feature_bin_count: Number of frequency bands to analyze. model_architecture: Name of the kind of model to generate. preprocess: How the spectrogram is processed to produce features, for example 'mfcc', 'average', or 'micro'. Raises: Exception: If the preprocessing mode isn't recognized. """ words_list = input_data.prepare_words_list(wanted_words.split(',')) model_settings = models.prepare_model_settings( len(words_list), sample_rate, clip_duration_ms, window_size_ms, window_stride_ms, feature_bin_count, preprocess) runtime_settings = {'clip_stride_ms': clip_stride_ms} wav_data_placeholder = tf.placeholder(tf.string, [], name='wav_data') decoded_sample_data = contrib_audio.decode_wav( wav_data_placeholder, desired_channels=1, desired_samples=model_settings['desired_samples'], name='decoded_sample_data') spectrogram = contrib_audio.audio_spectrogram( decoded_sample_data.audio, window_size=model_settings['window_size_samples'], stride=model_settings['window_stride_samples'], magnitude_squared=True) if preprocess == 'average': fingerprint_input = tf.nn.pool( tf.expand_dims(spectrogram, -1), window_shape=[1, model_settings['average_window_width']], strides=[1, model_settings['average_window_width']], pooling_type='AVG', padding='SAME') elif preprocess == 'mfcc': fingerprint_input = contrib_audio.mfcc( spectrogram, sample_rate, dct_coefficient_count=model_settings['fingerprint_width']) elif preprocess == 'micro': if not frontend_op: raise Exception( 'Micro frontend op is currently not available when running TensorFlow' ' directly from Python, you need to build and run through Bazel, for' ' example' ' `bazel run tensorflow/examples/speech_commands:freeze_graph`' ) sample_rate = model_settings['sample_rate'] window_size_ms = (model_settings['window_size_samples'] * 1000) / sample_rate window_step_ms = (model_settings['window_stride_samples'] * 1000) / sample_rate int16_input = tf.cast( tf.multiply(decoded_sample_data.audio, 32767), tf.int16) micro_frontend = frontend_op.audio_microfrontend( int16_input, sample_rate=sample_rate, window_size=window_size_ms, window_step=window_step_ms, num_channels=model_settings['fingerprint_width'], out_scale=1, out_type=tf.float32) fingerprint_input = tf.multiply(micro_frontend, (10.0 / 256.0)) else: raise Exception('Unknown preprocess mode "%s" (should be "mfcc",' ' "average", or "micro")' % (preprocess)) fingerprint_size = model_settings['fingerprint_size'] reshaped_input = tf.reshape(fingerprint_input, [-1, fingerprint_size]) logits = models.create_model( reshaped_input, model_settings, model_architecture, is_training=False, runtime_settings=runtime_settings) # Create an output to use for inference. tf.nn.softmax(logits, name='labels_softmax')
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run( [global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0] ]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav( waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment( alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
from data import CreateDataLoader from models import create_model from util.visualizer import save_images from util import html if __name__ == '__main__': opt = TestOptions().parse() opt.nThreads = 1 # test code only supports nThreads = 1 opt.batchSize = 1 # test code only supports batchSize = 1 opt.serial_batches = True # no shuffle opt.no_flip = True # no flip opt.display_id = -1 # no visdom display data_loader = CreateDataLoader(opt) dataset = data_loader.load_data() model = create_model(opt) model.setup(opt) # create website web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch)) webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.which_epoch)) # test for i, data in enumerate(dataset): if i >= opt.how_many: break model.set_input(data) model.test() visuals = model.get_current_visuals() img_path = model.get_image_paths() if i % 5 == 0: print('processing (%04d)-th image... %s' % (i, img_path)) save_images(webpage, visuals, img_path, aspect_ratio=opt.aspect_ratio, width=opt.display_winsize)
def train(args): print('start training...') model, model_file = create_model(args.backbone, args.img_sz) if args.multi_gpu: model = nn.DataParallel(model).cuda() if args.optim == 'Adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'SGD': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: raise ValueError('invalid optim') if args.lrs == 'plateau': lr_scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr) else: lr_scheduler = CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr) #ExponentialLR(optimizer, 0.9, last_epoch=-1) #CosineAnnealingLR(optimizer, 15, 1e-7) val_loader = get_val_loader(val_num=args.val_num, batch_size=args.batch_size * 2, dev_mode=args.dev_mode, img_sz=args.img_sz, workers=args.workers) best_map3 = 0. print( 'epoch | lr | % | loss | avg | top1 | top3 | top5 | loss | map3 | best | time | save |' ) if not args.no_first_val: #best_cls_acc, top1_acc, total_loss, cls_loss, num_loss = f2_validate(args, model, f2_val_loader)#validate_avg(args, model, args.start_epoch) top1_acc, top3_acc, top5_acc, val_loss, best_map3 = validate( args, model, val_loader) print( 'val | | | | | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | | |' .format(top1_acc, top3_acc, top5_acc, val_loss, best_map3, best_map3)) if args.val: return model.train() if args.lrs == 'plateau': lr_scheduler.step(best_map3) else: lr_scheduler.step() train_iter = 0 for epoch in range(args.start_epoch, args.epochs): train_loader = get_train_loader(train_index=epoch % args.train_num, batch_size=args.batch_size, dev_mode=args.dev_mode, img_sz=args.img_sz, workers=args.workers) train_loss = 0 optimizer.zero_grad() current_lr = get_lrs( optimizer) #optimizer.state_dict()['param_groups'][2]['lr'] bg = time.time() for batch_idx, data in enumerate(train_loader): train_iter += 1 img, target = data img, target = img.cuda(), target.cuda() #optimizer.zero_grad() output = model(img) loss = criterion(output, target) loss.backward() if ((batch_idx + 1) % args.acc_grad == 0) or (train_iter > 0 and train_iter % args.iter_val == 0): optimizer.step() optimizer.zero_grad() train_loss += loss.item() print('\r {:4d} | {:.5f} | {:4d}/{} | {:.4f} | {:.4f} |'.format( epoch, float(current_lr[0]), args.batch_size * (batch_idx + 1), train_loader.num, loss.item(), train_loss / (batch_idx + 1)), end='') if train_iter > 0 and train_iter % args.iter_val == 0: top1_acc, top3_acc, top5_acc, val_loss, map3 = validate( args, model, val_loader) _save_ckp = '' if args.always_save or map3 > best_map3: best_map3 = map3 if args.multi_gpu: torch.save(model.module.state_dict(), model_file) else: torch.save(model.state_dict(), model_file) _save_ckp = '*' print( ' {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.4f} | {:.2f} | {:4s} |' .format(top1_acc, top3_acc, top5_acc, val_loss, map3, best_map3, (time.time() - bg) / 60, _save_ckp)) model.train() if args.lrs == 'plateau': lr_scheduler.step(map3) else: lr_scheduler.step() current_lr = get_lrs(optimizer)
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Start a new TensorFlow session. sess = tf.InteractiveSession() # Begin by making sure we have the training data we need. If you already have # training data of your own, use `--data_url= ` on the command line to avoid # downloading. model_settings = models.prepare_model_settings( len(input_data.prepare_words_list(FLAGS.wanted_words.split(','))), FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms, FLAGS.window_stride_ms, FLAGS.dct_coefficient_count) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) fingerprint_size = model_settings['fingerprint_size'] label_count = model_settings['label_count'] time_shift_samples = int((FLAGS.time_shift_ms * FLAGS.sample_rate) / 1000) # Figure out the learning rates for each training phase. Since it's often # effective to have high learning rates at the start of training, followed by # lower levels towards the end, the number of steps and learning rates can be # specified as comma-separated lists to define the rate at each stage. For # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001 # will run 13,000 training loops in total, with a rate of 0.001 for the first # 10,000, and 0.0001 for the final 3,000. training_steps_list = list(map(int, FLAGS.how_many_training_steps.split(','))) learning_rates_list = list(map(float, FLAGS.learning_rate.split(','))) if len(training_steps_list) != len(learning_rates_list): raise Exception( '--how_many_training_steps and --learning_rate must be equal length ' 'lists, but are %d and %d long instead' % (len(training_steps_list), len(learning_rates_list))) fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') logits, dropout_prob = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, is_training=True) # Define loss and optimizer ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') # Optionally we can add runtime checks to spot when NaNs or other symptoms of # numerical errors start occurring during training. control_dependencies = [] if FLAGS.check_nans: checks = tf.add_check_numerics_ops() control_dependencies = [checks] # Create the back propagation and training evaluation machinery in the graph. with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_input, logits=logits)) tf.summary.scalar('cross_entropy', cross_entropy_mean) with tf.name_scope('train'), tf.control_dependencies(control_dependencies): learning_rate_input = tf.placeholder( tf.float32, [], name='learning_rate_input') train_step = tf.train.GradientDescentOptimizer( learning_rate_input).minimize(cross_entropy_mean) predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix(expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', evaluation_step) global_step = tf.train.get_or_create_global_step() increment_global_step = tf.assign(global_step, global_step + 1) saver = tf.train.Saver(tf.global_variables()) # Merge all the summaries and write them out to /tmp/retrain_logs (by default) merged_summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation') tf.global_variables_initializer().run() start_step = 1 if FLAGS.start_checkpoint: models.load_variables_from_checkpoint(sess, FLAGS.start_checkpoint) start_step = global_step.eval(session=sess) tf.logging.info('Training from step: %d ', start_step) # Save graph.pbtxt. tf.train.write_graph(sess.graph_def, FLAGS.train_dir, FLAGS.model_architecture + '.pbtxt') # Save list of words. with gfile.GFile( os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '_labels.txt'), 'w') as f: f.write('\n'.join(audio_processor.words_list)) # Training loop. training_steps_max = np.sum(training_steps_list) for training_step in xrange(start_step, training_steps_max + 1): # Figure out what the current learning rate is. training_steps_sum = 0 for i in range(len(training_steps_list)): training_steps_sum += training_steps_list[i] if training_step <= training_steps_sum: learning_rate_value = learning_rates_list[i] break # Pull the audio samples we'll use for training. train_fingerprints, train_ground_truth = audio_processor.get_data( FLAGS.batch_size, 0, model_settings, FLAGS.background_frequency, FLAGS.background_volume, time_shift_samples, 'training', sess) # Run the graph with this batch of training data. train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run( [ merged_summaries, evaluation_step, cross_entropy_mean, train_step, increment_global_step ], feed_dict={ fingerprint_input: train_fingerprints, ground_truth_input: train_ground_truth, learning_rate_input: learning_rate_value, dropout_prob: 0.5 }) train_writer.add_summary(train_summary, training_step) tf.logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' % (training_step, learning_rate_value, train_accuracy * 100, cross_entropy_value)) is_last_step = (training_step == training_steps_max) if (training_step % FLAGS.eval_step_interval) == 0 or is_last_step: set_size = audio_processor.set_size('validation') total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) # Run a validation step and capture training summaries for TensorBoard # with the `merged` op. validation_summary, validation_accuracy, conf_matrix = sess.run( [merged_summaries, evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, dropout_prob: 1.0 }) validation_writer.add_summary(validation_summary, training_step) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' % (training_step, total_accuracy * 100, set_size)) # Save the model checkpoint periodically. if (training_step % FLAGS.save_step_interval == 0 or training_step == training_steps_max): checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.model_architecture + '.ckpt') tf.logging.info('Saving to "%s-%d"', checkpoint_path, training_step) saver.save(sess, checkpoint_path, global_step=training_step) set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, dropout_prob: 1.0 }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % (total_accuracy * 100, set_size))
def run_inference(wanted_words, sample_rate, clip_duration_ms, window_size_ms, window_stride_ms, dct_coefficient_count, model_architecture, model_size_info): """Creates an audio model with the nodes needed for inference. Uses the supplied arguments to create a model, and inserts the input and output nodes that are needed to use the graph for inference. Args: wanted_words: Comma-separated list of the words we're trying to recognize. sample_rate: How many samples per second are in the input audio files. clip_duration_ms: How many samples to analyze for the audio pattern. window_size_ms: Time slice duration to estimate frequencies from. window_stride_ms: How far apart time slices should be. dct_coefficient_count: Number of frequency bands to analyze. model_architecture: Name of the kind of model to generate. model_size_info: Model dimensions : different lengths for different models """ tf.logging.set_verbosity(tf.logging.INFO) sess = tf.InteractiveSession() words_list = input_data.prepare_words_list(wanted_words.split(',')) model_settings = models.prepare_model_settings( len(words_list), sample_rate, clip_duration_ms, window_size_ms, window_stride_ms, dct_coefficient_count) audio_processor = input_data.AudioProcessor( FLAGS.data_url, FLAGS.data_dir, FLAGS.silence_percentage, FLAGS.unknown_percentage, FLAGS.wanted_words.split(','), FLAGS.validation_percentage, FLAGS.testing_percentage, model_settings) label_count = model_settings['label_count'] fingerprint_size = model_settings['fingerprint_size'] fingerprint_input = tf.placeholder( tf.float32, [None, fingerprint_size], name='fingerprint_input') logits = models.create_model( fingerprint_input, model_settings, FLAGS.model_architecture, FLAGS.model_size_info, is_training=False) ground_truth_input = tf.placeholder( tf.float32, [None, label_count], name='groundtruth_input') predicted_indices = tf.argmax(logits, 1) expected_indices = tf.argmax(ground_truth_input, 1) correct_prediction = tf.equal(predicted_indices, expected_indices) confusion_matrix = tf.confusion_matrix( expected_indices, predicted_indices, num_classes=label_count) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) models.load_variables_from_checkpoint(sess, FLAGS.checkpoint) # training set set_size = audio_processor.set_size('training') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): training_fingerprints, training_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'training', sess)) training_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: training_fingerprints, ground_truth_input: training_ground_truth, }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (training_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Training accuracy = %.2f%% (N=%d)' % (total_accuracy * 100, set_size)) # validation set set_size = audio_processor.set_size('validation') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): validation_fingerprints, validation_ground_truth = ( audio_processor.get_data(FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'validation', sess)) validation_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: validation_fingerprints, ground_truth_input: validation_ground_truth, }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (validation_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Validation accuracy = %.2f%% (N=%d)' % (total_accuracy * 100, set_size)) # test set set_size = audio_processor.set_size('testing') tf.logging.info('set_size=%d', set_size) total_accuracy = 0 total_conf_matrix = None for i in xrange(0, set_size, FLAGS.batch_size): test_fingerprints, test_ground_truth = audio_processor.get_data( FLAGS.batch_size, i, model_settings, 0.0, 0.0, 0, 'testing', sess) test_accuracy, conf_matrix = sess.run( [evaluation_step, confusion_matrix], feed_dict={ fingerprint_input: test_fingerprints, ground_truth_input: test_ground_truth, }) batch_size = min(FLAGS.batch_size, set_size - i) total_accuracy += (test_accuracy * batch_size) / set_size if total_conf_matrix is None: total_conf_matrix = conf_matrix else: total_conf_matrix += conf_matrix tf.logging.info('Confusion Matrix:\n %s' % (total_conf_matrix)) tf.logging.info('Test accuracy = %.2f%% (N=%d)' % (total_accuracy * 100, set_size))