def command_line(): p = OptionParser() p.add_option('-H', '--host', dest='host', action='store', help='IMAP host connect to') p.add_option('-u', '--username', dest='username', action='store', help='Username to login with') p.add_option('-p', '--password', dest='password', action='store', help='Password to login with') p.add_option('-P', '--port', dest='port', action='store', default=None, help='IMAP port to use (default is 143)') p.add_option('-s', '--ssl', dest='ssl', action='store_true', default=False, help='Use SSL connection') p.add_option('-f', '--file', dest='file', action='store', default=None, help='Config file (same as livetest)') opts, args = p.parse_args() if args: p.error('unexpected arguments %s' % ' '.join(args)) if opts.file: if opts.host or opts.username or opts.password or opts.port or opts.ssl: p.error('If -f/--file is given no other options can be used') # Use the options in the config file opts = parse_config_file(opts.file) else: # Get compulsory options if not given on the command line for opt_name in ('host', 'username', 'password'): if not getattr(opts, opt_name): setattr(opts, opt_name, getpass(opt_name + ': ')) if not opts.port: opts.port = 143 opts.oauth = False # OAUTH not supported on command line return opts
def main(): args = parse_args() cfg = parse_config_file(args.config_file) visualize( tfrecords=args.tfrecords, cfg=cfg )
def main(): args = parse_args() print "Command line arguments:" pprint.pprint(vars(args)) print cfg = parse_config_file(args.config_file) print "Configurations:" pprint.pprint(cfg) print if args.max_number_of_steps != None: cfg.NUM_TRAIN_ITERATIONS = args.max_number_of_steps if args.batch_size != None: cfg.BATCH_SIZE = args.batch_size with open(args.priors) as f: bbox_priors = pickle.load(f) bbox_priors = np.array(bbox_priors).astype(np.float32) train( tfrecords=args.tfrecords, bbox_priors=bbox_priors, logdir=args.logdir, cfg=cfg, pretrained_model_path=args.pretrained_model, fine_tune = args.fine_tune, extract_feats = args.extract_features, trainable_scopes = args.trainable_scopes, use_moving_averages = args.use_moving_averages, restore_moving_averages = args.restore_moving_averages )
def parse_argv(): args = sys.argv[1:] if not args: argv_error('Please specify a host configuration file. See livetest-sample.ini for an example.') ini_path = sys.argv.pop(1) # 2nd arg should be the INI file if not os.path.isfile(ini_path): argv_error('%r is not a livetest INI file' % ini_path) host_config = parse_config_file(ini_path) return host_config
def main(): if options.debug: define('settings', '%s/wechat.conf' % config.PROJDIR) else: define('settings', '') parse_command_line() debug = options.debug config.parse_config_file(options.settings) if not debug: options.debug = False if options.debug: logging.info('Starting server at port %s in debug mode' % options.port) else: logging.info('Starting server at port %s' % options.port) server = HTTPServer(Application(), xheaders=True) server.listen(int(options.port)) IOLoop.instance().start()
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s: %(message)s [in " "%(funcName)s %(filename)s:%(lineno)d]", datefmt="%H:%M") args = parse_cmdline() if args.debug: logging.getLogger().setLevel(logging.DEBUG) config = parse_config_file(args.config) stdin_pw = handle_stdin() if stdin_pw: config['general']['password'] = stdin_pw run_command(args, config)
def debug(tfrecord_path, config_path = None): tfrecords = [tfrecord_path] cfg = parse_config_file(config_path) graph = tf.Graph() sess = tf.Session(graph = graph) # run a session to look at the images... with sess.as_default(), graph.as_default(): # Input Nodes images, batched_bboxes, batched_num_bboxes, image_ids = inputs.input_nodes( tfrecords=tfrecords, max_num_bboxes = cfg.MAX_NUM_BBOXES, num_epochs=None, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, add_summaries = True, augment=cfg.AUGMENT_IMAGE, shuffle_batch=False, cfg=cfg ) coord = tf.train.Coordinator() plt.ion() tf.initialize_all_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) done = False while not done: output = sess.run([images, batched_bboxes]) for image, bboxes in zip(output[0], output[1]): plt.imshow((image * cfg.IMAGE_STD + cfg.IMAGE_MEAN).astype(np.uint8)) # plot the ground truth bounding boxes for bbox in bboxes: xmin, ymin, xmax, ymax = bbox * cfg.INPUT_SIZE plt.plot([xmin, xmax, xmax, xmin, xmin], [ymin, ymin, ymax, ymax, ymin], 'b-') plt.show(block=False) t = raw_input("push button") if t != '': done = True plt.clf()
def main(): args = parse_args() cfg = parse_config_file(args.config_file) if args.batch_size != None: cfg.BATCH_SIZE = args.batch_size if args.model_name != None: cfg.MODEL_NAME = args.model_name extract_and_save(tfrecords=args.tfrecords, checkpoint_path=args.checkpoint_path, save_path=args.save_path, num_iterations=args.batches, feature_keys=args.features, cfg=cfg)
def main(): args = parse_args() print "Command line arguments:" pprint.pprint(vars(args)) print cfg = parse_config_file(args.config_file) print "Configurations:" pprint.pprint(cfg) print with open(args.priors) as f: bbox_priors = pickle.load(f) bbox_priors = np.array(bbox_priors).astype(np.float32) detect_visualize(tfrecords=args.tfrecords, bbox_priors=bbox_priors, checkpoint_path=args.checkpoint_path, cfg=cfg)
def debug(tfrecord_path, config_path): graph = tf.get_default_graph() tfrecords = [tfrecord_path] cfg = parse_config_file(config_path) # Input Nodes images, labels_sparse, instance_ids = construct_network_input_nodes( tfrecords=tfrecords, input_type=cfg.INPUT_TYPE, num_epochs=None, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, add_summaries = False, augment=cfg.AUGMENT_IMAGE, shuffle_batch=False, cfg=cfg ) coord = tf.train.Coordinator() plt.ion() # run a session to look at the images... with tf.Session() as sess: tf.initialize_all_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) while True: output = sess.run([images, labels_sparse, instance_ids]) for image, label, image_id in zip(output[0], output[1], output[2]): plt.imshow((image * cfg.IMAGE_STD + cfg.IMAGE_MEAN).astype(np.uint8)) plt.title("Class: %d\tImage: %s" % (label,image_id)) plt.show(block=False) t = raw_input("push button") if t != '': return
def main(): args = parse_args() print "Command line arguments:" pprint.pprint(vars(args)) print cfg = parse_config_file(args.config_file) print "Configurations:" pprint.pprint(cfg) print with open(args.priors) as f: bbox_priors = pickle.load(f) bbox_priors = np.array(bbox_priors).astype(np.float32) eval(tfrecords=args.tfrecords, bbox_priors=bbox_priors, summary_dir=args.summary_dir, checkpoint_path=args.checkpoint_path, max_iterations=args.max_iterations, cfg=cfg)
def main(): # Parse command-line arguments args = parse_args() config_file = args.pop('config', None) no_ssl = args.pop('nossl', False) # Read config file if the option was specified if config_file is not None: opts = config.parse_config_file(config_file) else: opts = config.DEFAULTS # Command-line arguments override config file settings opts.update(args) if no_ssl: opts['ssl'] = False # Check for label rules if 'labels' not in opts or not opts['labels']: raise AppError("Please specify some label definitions") # If the user or password is not specified, prompt for them now for opt in ('user', 'password'): if opt not in opts or opts[opt] is None: opts[opt] = getpass.getpass(opt + ': ') imap_args = opts.copy() del imap_args['folder'] del imap_args['labels'] client = create_imap_client(**imap_args) observer = ConsoleObserver() weighmail(client, opts['folder'], opts['labels'], observer) client.logout()
action='store_true') if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() print "Called with:" print args cfg = parse_config_file(args.config_file) # Modify the configuration hyper-parameters for prep purposes. cfg.PHASE = "TRAIN" cfg.USE_BATCH_STATISTICS = True print "Configurations:" print cfg with open(args.priors) as f: bbox_priors = pickle.load(f) train( tfrecords=args.tfrecords, bbox_priors=bbox_priors, logdir=args.logdir, cfg=cfg,
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelist with io.open(os.path.join(datadir, 'filelists', 'train')) as f: filelist = [l.rstrip() for l in f if l.rstrip()] if options.validate: with io.open(os.path.join(datadir, 'filelists', 'valid')) as f: filelist_val = [l.strip() for l in f if l.strip()] filelist.extend(filelist_val) else: filelist_val = [] # - compute spectra print("Computing%s spectra..." % (" or loading" if options.cache_spectra else "")) spects = [] for fn in progress(filelist, 'File '): cache_fn = (options.cache_spectra and os.path.join(options.cache_spectra, fn + '.npy')) spects.append( cached(cache_fn, audio.extract_spect, os.path.join(datadir, 'audio', fn), sample_rate, frame_len, fps)) # - load and convert corresponding labels print("Loading labels...") labels = [] for fn, spect in zip(filelist, spects): fn = os.path.join(datadir, 'labels', fn.rsplit('.', 1)[0] + '.lab') with io.open(fn) as f: segments = [l.rstrip().split() for l in f if l.rstrip()] segments = [(float(start), float(end), label == 'sing') for start, end, label in segments] timestamps = np.arange(len(spect)) / float(fps) labels.append(create_aligned_targets(segments, timestamps, np.bool)) # - prepare mel filterbank filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands, mel_min, mel_max) filterbank = filterbank[:bin_mel_max].astype(floatX) if options.validate: spects_val = spects[-len(filelist_val):] spects = spects[:-len(filelist_val)] labels_val = labels[-len(filelist_val):] labels = labels[:-len(filelist_val)] # - precompute mel spectra, if needed, otherwise just define a generator mel_spects = (np.log( np.maximum(np.dot(spect[:, :bin_mel_max], filterbank), 1e-7)) for spect in spects) if not options.augment: mel_spects = list(mel_spects) del spects # - load mean/std or compute it, if not computed yet meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) try: with np.load(meanstd_file) as f: mean = f['mean'] std = f['std'] except (IOError, KeyError): print("Computing mean and standard deviation...") mean, std = znorm.compute_mean_std(mel_spects) np.savez(meanstd_file, mean=mean, std=std) mean = mean.astype(floatX) istd = np.reciprocal(std).astype(floatX) # - prepare training data generator print("Preparing training data feed...") if not options.augment: # Without augmentation, we just precompute the normalized mel spectra # and create a generator that returns mini-batches of random excerpts mel_spects = [(spect - mean) * istd for spect in mel_spects] batches = augment.grab_random_excerpts(mel_spects, labels, batchsize, blocklen) else: # For time stretching and pitch shifting, it pays off to preapply the # spline filter to each input spectrogram, so it does not need to be # applied to each mini-batch later. spline_order = cfg['spline_order'] if spline_order > 1: from scipy.ndimage import spline_filter spects = [ spline_filter(spect, spline_order).astype(floatX) for spect in spects ] # We define a function to create the mini-batch generator. This allows # us to easily create multiple generators for multithreading if needed. def create_datafeed(spects, labels): # With augmentation, as we want to apply random time-stretching, # we request longer excerpts than we finally need to return. max_stretch = cfg['max_stretch'] batches = augment.grab_random_excerpts( spects, labels, batchsize=batchsize, frames=int(blocklen / (1 - max_stretch))) # We wrap the generator in another one that applies random time # stretching and pitch shifting, keeping a given number of frames # and bins only. max_shift = cfg['max_shift'] batches = augment.apply_random_stretch_shift(batches, max_stretch, max_shift, keep_frames=blocklen, keep_bins=bin_mel_max, order=spline_order, prefiltered=True) # We transform the excerpts to mel frequency and log magnitude. batches = augment.apply_filterbank(batches, filterbank) batches = augment.apply_logarithm(batches) # We apply random frequency filters max_db = cfg['max_db'] batches = augment.apply_random_filters(batches, filterbank, mel_max, max_db=max_db) # We apply normalization batches = augment.apply_znorm(batches, mean, istd) return batches # We start the mini-batch generator and augmenter in one or more # background threads or processes (unless disabled). bg_threads = cfg['bg_threads'] bg_processes = cfg['bg_processes'] if not bg_threads and not bg_processes: # no background processing: just create a single generator batches = create_datafeed(spects, labels) elif bg_threads: # multithreading: create a separate generator per thread batches = augment.generate_in_background( [create_datafeed(spects, labels) for _ in range(bg_threads)], num_cached=bg_threads * 5) elif bg_processes: # multiprocessing: single generator is forked along with processes batches = augment.generate_in_background( [create_datafeed(spects, labels)] * bg_processes, num_cached=bg_processes * 25, in_processes=True) ########################################################################### #-----------Main changes to code to make it work with pytorch-------------# ########################################################################### print("preparing training function...") mdl = model.CNNModel() mdl = mdl.to(device) #Setting up learning rate and learning rate parameters initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] momentum = cfg['momentum'] eta_decay_every = cfg.get('eta_decay_every', 1) eta = initial_eta #set up loss criterion = torch.nn.BCELoss() #set up optimizer optimizer = torch.optim.SGD(mdl.parameters(), lr=eta, momentum=momentum, nesterov=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=eta_decay_every, gamma=eta_decay) #set up optimizer writer = SummaryWriter(os.path.join(modelfile, 'runs')) epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) #conditions to save model best_val_loss = 100000. best_val_error = 1. for epoch in range(epochs): # - Initialize certain parameters that are used to monitor training err = 0 total_norm = 0 loss_accum = 0 mdl.train(True) # - Compute the L-2 norm of the gradients for p in mdl.parameters(): if p.grad is not None: param_norm = p.grad.data.norm(2) total_norm += param_norm.item()**2 total_norm = total_norm**(1. / 2) # - Start the training for this epoch for batch in progress(range(epochsize), min_delay=0.5, desc='Epoch %d/%d: Batch ' % (epoch + 1, epochs)): data = next(batches) input_data = np.transpose(data[0][:, :, :, np.newaxis], (0, 3, 1, 2)) labels = data[1][:, np.newaxis].astype(np.float32) #map labels to make them softer labels = (0.02 + 0.96 * labels) optimizer.zero_grad() outputs = mdl(torch.from_numpy(input_data).to(device)) loss = criterion(outputs, torch.from_numpy(labels).to(device)) loss.backward() optimizer.step() loss_accum += loss.item() # - Compute validation loss and error if desired if options.validate: from eval import evaluate mdl.train(False) val_loss = 0 preds = [] labs = [] max_len = fps mel_spects_val = (np.log( np.maximum(np.dot(spect[:, :bin_mel_max], filterbank), 1e-7)) for spect in spects_val) mel_spects_val = [(spect - mean) * istd for spect in mel_spects_val] num_iter = 0 for spect, label in zip(mel_spects_val, labels_val): num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect, shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) # - Pass mini-batches through the network and concatenate results for pos in range(0, num_excerpts, batchsize): input_data = np.transpose( excerpts[pos:pos + batchsize, :, :, np.newaxis], (0, 3, 1, 2)) if (pos + batchsize > num_excerpts): label_batch = label[blocklen // 2 + pos:blocklen // 2 + num_excerpts, np.newaxis].astype(np.float32) else: label_batch = label[blocklen // 2 + pos:blocklen // 2 + pos + batchsize, np.newaxis].astype(np.float32) pred = mdl(torch.from_numpy(input_data).to(device)) e = criterion(pred, torch.from_numpy(label_batch).to(device)) preds = np.append(preds, pred[:, 0].cpu().detach().numpy()) labs = np.append(labs, label_batch) val_loss += e.item() num_iter += 1 print("Validation loss: %.3f" % (val_loss / num_iter)) _, results = evaluate(preds, labs) print("Validation error: %.3f" % (1 - results['accuracy'])) if (val_loss / num_iter < best_val_loss and (1 - results['accuracy']) < best_val_error): torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) best_val_loss = val_loss / num_iter best_val_error = 1 - results['accuracy'] print('New saved model', best_val_loss, best_val_error) #Update the learning rate scheduler.step() print('Training Loss per epoch', loss_accum / epochsize) # - Save parameters for examining writer.add_scalar('Training Loss', loss_accum / epochsize, epoch) writer.add_scalar('Validation loss', val_loss / num_iter, epoch) writer.add_scalar('Gradient norm', total_norm, epoch) writer.add_scalar('Validation error', 1 - results['accuracy']) for param_group in optimizer.param_groups: print(param_group['lr']) if not options.validate: torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth'))
def main(): print(torch.cuda.is_available()) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile lossgradient = options.lossgradient cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) outfile = options.outfile sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset print("Preparing data reading...") datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelist with io.open(os.path.join(datadir, 'filelists', 'valid')) as f: filelist = [l.rstrip() for l in f if l.rstrip()] with io.open(os.path.join(datadir, 'filelists', 'test')) as f: filelist += [l.rstrip() for l in f if l.rstrip()] # - load mean/std meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type, filelist=filelist) mel_spects, labels = dataloader.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize, batch_data=False) with np.load(meanstd_file) as f: mean = f['mean'] std = f['std'] mean = mean.astype(floatX) istd = np.reciprocal(std).astype(floatX) mdl = model.CNNModel(input_type='mel_spects_norm', is_zeromean=False, meanstd_file=meanstd_file, device=device) mdl.load_state_dict(torch.load(modelfile)) mdl.to(device) mdl.eval() if (lossgradient != 'None'): mdl_lossgrad = model.CNNModel(input_type=options.input_type, is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) mdl_lossgrad.load_state_dict(torch.load(lossgradient)) mdl_lossgrad.to(device) mdl_lossgrad.eval() criterion = torch.nn.BCELoss() loss_grad_val = dataloader.prepare_loss_grad_batches( options.loss_grad_save, mel_spects, labels, mdl_lossgrad, criterion, blocklen, batchsize, device) # run prediction loop print("Predicting:") predictions = [] #for spect, g in zip(mel_spects, loss_grad_val): c = 0 for spect in progress(mel_spects, total=len(filelist), desc='File '): if (lossgradient != 'None'): g = loss_grad_val[c] c += 1 # naive way: pass excerpts of the size used during training # - view spectrogram memory as a 3-tensor of overlapping excerpts num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect.astype(floatX), shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) preds = np.zeros((num_excerpts, 1)) count = 0 for pos in range(0, num_excerpts, batchsize): input_data = np.transpose( excerpts[pos:pos + batchsize, :, :, np.newaxis], (0, 3, 1, 2)) input_data = (input_data - mean) * istd if lossgradient != 'None': for i in range(input_data.shape[0]): if (options.lossgrad_algorithm == 'grad'): rank_matrix = np.abs(g[i + pos]) elif (options.lossgrad_algorithm == 'gradxinp'): rank_matrix = np.squeeze(g[i + pos] * input_data[i, :, :, :]) elif (options.lossgrad_algorithm == 'gradorig'): rank_matrix = g[i + pos] if (options.ROAR == 1): v = np.argsort(rank_matrix, axis=None)[-cfg['occlude']:] else: v = np.argsort(rank_matrix, axis=None)[:cfg['occlude']] input_data[i, :, v // 80, v % 80] = 0 else: for i in range(input_data.shape[0]): #print('random') v = np.random.choice(115 * 80, cfg['occlude'], replace=False) input_data[i, :, v // 80, v % 80] = 0 count += 1 #print('Here') #preds = np.vstack(mdl.forward(torch.from_numpy( # np.transpose(excerpts[pos:pos + batchsize,:,:, # np.newaxis],(0,3,1,2))).to(device)).cpu().detach().numpy() # for pos in range(0, num_excerpts, batchsize)) preds[pos:pos + batchsize, :] = mdl( torch.from_numpy(input_data).to( device)).cpu().detach().numpy() print('Here') predictions.append(preds) # save predictions print("Saving predictions") np.savez(outfile, **{fn: pred for fn, pred in zip(filelist, predictions)})
required=True, type=str) parser.add_argument('--export_path', dest='export_path', help='Path to a directory where the exported model will be saved.', required=True, type=str) parser.add_argument('--export_version', dest='export_version', help='Version number of the model.', required=True, type=int) parser.add_argument('--config', dest='config_file', help='Path to the configuration file.', required=True, type=str) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() print "Called with:" print pprint.pprint(args) cfg = parse_config_file(args.config_file) print "Configurations:" print pprint.pprint(cfg) export(args.checkpoint_path, args.export_path, args.export_version, cfg=cfg)
def main(): print(torch.cuda.is_available()) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) outfile = options.outfile sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset print("Preparing data reading...") datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelist with io.open(os.path.join(datadir, 'filelists', 'valid')) as f: filelist = [l.rstrip() for l in f if l.rstrip()] with io.open(os.path.join(datadir, 'filelists', 'test')) as f: filelist += [l.rstrip() for l in f if l.rstrip()] # - create generator for spectra spects = (cached( options.cache_spectra and os.path.join(options.cache_spectra, fn + '.npy'), audio.extract_spect, os.path.join(datadir, 'audio', fn), sample_rate, frame_len, fps) for fn in filelist) # - pitch-shift if needed if options.pitchshift: import scipy.ndimage spline_order = 2 spects = (scipy.ndimage.affine_transform( spect, (1, 1 / (1 + options.pitchshift / 100.)), output_shape=(len(spect), mel_max), order=spline_order) for spect in spects) # - prepare mel filterbank filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands, mel_min, mel_max) filterbank = filterbank[:bin_mel_max].astype(floatX) # - define generator for mel spectra spects = (np.log( np.maximum(np.dot(spect[:, :bin_mel_max], filterbank), 1e-7)) for spect in spects) # - load mean/std meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) with np.load(meanstd_file) as f: mean = f['mean'] std = f['std'] mean = mean.astype(floatX) istd = np.reciprocal(std).astype(floatX) # - define generator for Z-scoring spects = ((spect - mean) * istd for spect in spects) # - define generator for silence-padding pad = np.tile((np.log(1e-7) - mean) * istd, (blocklen // 2, 1)) spects = (np.concatenate((pad, spect, pad), axis=0) for spect in spects) # - we start the generator in a background thread (not required) spects = augment.generate_in_background([spects], num_cached=1) mdl = model.CNNModel() mdl.load_state_dict(torch.load(modelfile)) mdl.to(device) mdl.eval() # run prediction loop print("Predicting:") predictions = [] for spect in progress(spects, total=len(filelist), desc='File '): # naive way: pass excerpts of the size used during training # - view spectrogram memory as a 3-tensor of overlapping excerpts num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect, shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) # - pass mini-batches through the network and concatenate results preds = np.vstack( mdl( torch.from_numpy( np.transpose( excerpts[pos:pos + batchsize, :, :, np.newaxis], ( 0, 3, 1, 2))).to(device)).cpu().detach().numpy() for pos in range(0, num_excerpts, batchsize)) predictions.append(preds) # save predictions print("Saving predictions") np.savez(outfile, **{fn: pred for fn, pred in zip(filelist, predictions)})
def main(): # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile if options.load_spectra != 'memory' and not options.cache_spectra: parser.error('option --load-spectra=%s requires --cache-spectra' % options.load_spectra) # read configuration files and immediate settings cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) # read some settings into local variables sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 if cfg['filterbank'] == 'mel_learn': bin_mel_max = bin_nyquist else: bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelist with io.open( os.path.join(datadir, 'filelists', cfg.get('filelist.train', 'train'))) as f: filelist = [l.rstrip() for l in f if l.rstrip()] if options.validate: with io.open( os.path.join(datadir, 'filelists', cfg.get('filelist.valid', 'valid'))) as f: filelist_val = [l.rstrip() for l in f if l.rstrip()] filelist.extend(filelist_val) else: filelist_val = [] # - compute spectra print("Computing%s spectra..." % (" or loading" if options.cache_spectra else "")) spects = [] for fn in progress(filelist, 'File '): cache_fn = (options.cache_spectra and os.path.join(options.cache_spectra, fn + '.npy')) spects.append( cached(cache_fn, audio.extract_spect, os.path.join(datadir, 'audio', fn), sample_rate, frame_len, fps, loading_mode=options.load_spectra)) # - load and convert corresponding labels print("Loading labels...") labels = [] for fn, spect in zip(filelist, spects): fn = os.path.join(datadir, 'labels', fn.rsplit('.', 1)[0] + '.lab') with io.open(fn) as f: segments = [l.rstrip().split() for l in f if l.rstrip()] segments = [(float(start), float(end), label == 'sing') for start, end, label in segments] timestamps = np.arange(len(spect)) / float(fps) labels.append(create_aligned_targets(segments, timestamps, np.bool)) # - split off validation data, if needed if options.validate: spects_val = spects[-len(filelist_val):] spects = spects[:-len(filelist_val)] labels_val = labels[-len(filelist_val):] labels = labels[:-len(filelist_val)] # - prepare training data generator print("Preparing training data feed...") if not options.augment: # Without augmentation, we just create a generator that returns # mini-batches of random excerpts batches = augment.grab_random_excerpts(spects, labels, batchsize, blocklen, bin_mel_max) batches = augment.generate_in_background([batches], num_cached=15) else: # For time stretching and pitch shifting, it pays off to preapply the # spline filter to each input spectrogram, so it does not need to be # applied to each mini-batch later. spline_order = cfg['spline_order'] if spline_order > 1 and options.load_spectra == 'memory': from scipy.ndimage import spline_filter spects = [ spline_filter(spect, spline_order).astype(floatX) for spect in spects ] prefiltered = True else: prefiltered = False # We define a function to create the mini-batch generator. This allows # us to easily create multiple generators for multithreading if needed. def create_datafeed(spects, labels): # With augmentation, as we want to apply random time-stretching, # we request longer excerpts than we finally need to return. max_stretch = cfg['max_stretch'] batches = augment.grab_random_excerpts( spects, labels, batchsize=batchsize, frames=int(blocklen / (1 - max_stretch))) # We wrap the generator in another one that applies random time # stretching and pitch shifting, keeping a given number of frames # and bins only. max_shift = cfg['max_shift'] batches = augment.apply_random_stretch_shift( batches, max_stretch, max_shift, keep_frames=blocklen, keep_bins=bin_mel_max, order=spline_order, prefiltered=prefiltered) # We apply random frequency filters max_db = cfg['max_db'] batches = augment.apply_random_filters(batches, mel_max, max_db) # We apply random loudness changes max_loudness = cfg['max_loudness'] if max_loudness: batches = augment.apply_random_loudness(batches, max_loudness) return batches # We start the mini-batch generator and augmenter in one or more # background threads or processes (unless disabled). bg_threads = cfg['bg_threads'] bg_processes = cfg['bg_processes'] if not bg_threads and not bg_processes: # no background processing: just create a single generator batches = create_datafeed(spects, labels) elif bg_threads: # multithreading: create a separate generator per thread batches = augment.generate_in_background( [create_datafeed(spects, labels) for _ in range(bg_threads)], num_cached=bg_threads * 5) elif bg_processes: # multiprocessing: single generator is forked along with processes batches = augment.generate_in_background( [create_datafeed(spects, labels)] * bg_processes, num_cached=bg_processes * 25, in_processes=True) print("Preparing training function...") # instantiate neural network input_var = T.tensor3('input') inputs = input_var.dimshuffle(0, 'x', 1, 2) # insert "channels" dimension network = model.architecture(inputs, (None, 1, blocklen, bin_mel_max), cfg) print( "- %d layers (%d with weights), %f mio params" % (len(lasagne.layers.get_all_layers(network)), sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True) / 1e6)) print("- weight shapes: %r" % [ l.W.get_value().shape for l in lasagne.layers.get_all_layers(network) if hasattr(l, 'W') and hasattr(l.W, 'get_value') ]) # create cost expression target_var = T.vector('targets') targets = (0.02 + 0.96 * target_var) # map 0 -> 0.02, 1 -> 0.98 targets = targets.dimshuffle(0, 'x') # turn into column vector outputs = lasagne.layers.get_output(network, deterministic=False) cost = T.mean(lasagne.objectives.binary_crossentropy(outputs, targets)) if cfg.get('l2_decay', 0): cost_l2 = lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) * cfg['l2_decay'] else: cost_l2 = 0 # prepare and compile training function params = lasagne.layers.get_all_params(network, trainable=True) initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] eta_decay_every = cfg.get('eta_decay_every', 1) patience = cfg.get('patience', 0) trials_of_patience = cfg.get('trials_of_patience', 1) patience_criterion = cfg.get( 'patience_criterion', 'valid_loss' if options.validate else 'train_loss') momentum = cfg['momentum'] first_params = params[:cfg['first_params']] first_params_eta_scale = cfg['first_params_eta_scale'] if cfg['learn_scheme'] == 'nesterov': learn_scheme = lasagne.updates.nesterov_momentum elif cfg['learn_scheme'] == 'momentum': learn_scheme = lasagne.update.momentum elif cfg['learn_scheme'] == 'adam': learn_scheme = lasagne.updates.adam else: raise ValueError('Unknown learn_scheme=%s' % cfg['learn_scheme']) eta = theano.shared(lasagne.utils.floatX(initial_eta)) if not first_params or first_params_eta_scale == 1: updates = learn_scheme(cost + cost_l2, params, eta, momentum) else: grads = theano.grad(cost + cost_l2, params) updates = learn_scheme(grads[len(first_params):], params[len(first_params):], eta, momentum) if first_params_eta_scale > 0: updates.update( learn_scheme(grads[:len(first_params)], first_params, eta * first_params_eta_scale, momentum)) print("Compiling training function...") train_fn = theano.function([input_var, target_var], cost, updates=updates) # prepare and compile validation function, if requested if options.validate: print("Compiling validation function...") import model_to_fcn network_test = model_to_fcn.model_to_fcn(network, allow_unlink=False) outputs_test = lasagne.layers.get_output(network_test, deterministic=True) cost_test = T.mean( lasagne.objectives.binary_crossentropy(outputs_test, targets)) val_fn = theano.function([input_var, target_var], [cost_test, outputs_test]) # run training loop print("Training:") epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) if options.save_errors: errors = [] if first_params and cfg['first_params_log']: first_params_hist = [] if patience > 0: best_error = np.inf best_state = get_state(network, updates) for epoch in range(epochs): # actual training err = 0 for batch in progress(range(epochsize), min_delay=.5, desc='Epoch %d/%d: Batch ' % (epoch + 1, epochs)): err += train_fn(*next(batches)) if not np.isfinite(err): print("\nEncountered NaN loss in training. Aborting.") sys.exit(1) if first_params and cfg['first_params_log'] and ( batch % cfg['first_params_log'] == 0): first_params_hist.append( tuple(param.get_value() for param in first_params)) np.savez( modelfile[:-4] + '.hist.npz', **{ 'param%d' % i: param for i, param in enumerate(zip(*first_params_hist)) }) # report training loss print("Train loss: %.3f" % (err / epochsize)) if options.save_errors: errors.append(err / epochsize) # compute and report validation loss, if requested if options.validate: val_err = 0 preds = [] max_len = int(fps * cfg.get('val.max_len', 30)) for spect, label in zip(spects_val, labels_val): # pick excerpt of val.max_len seconds in center of file excerpt = slice(max(0, (len(spect) - max_len) // 2), (len(spect) + max_len) // 2) # crop to maximum length and required spectral bins spect = spect[None, excerpt, :bin_mel_max] # crop to maximum length and remove edges lost in the network label = label[excerpt][blocklen // 2:-(blocklen // 2)] e, pred = val_fn(spect, label) val_err += e preds.append((pred[:, 0], label)) print("Validation loss: %.3f" % (val_err / len(filelist_val))) from eval import evaluate _, results = evaluate(*zip(*preds)) print("Validation error: %.3f" % (1 - results['accuracy'])) if options.save_errors: errors.append(val_err / len(filelist_val)) errors.append(1 - results['accuracy']) # update learning rate and/or apply early stopping, if needed if patience > 0: if patience_criterion == 'train_loss': cur_error = err / epochsize elif patience_criterion == 'valid_loss': cur_error = val_err / len(filelist_val) elif patience_criterion == 'valid_error': cur_error = 1 - results['accuracy'] if cur_error <= best_error: best_error = cur_error best_state = get_state(network, updates) patience = cfg['patience'] else: patience -= 1 if patience == 0: if eta_decay_every == 'trial_of_patience' and eta_decay != 1: eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay)) restore_state(network, updates, best_state) patience = cfg['patience'] trials_of_patience -= 1 print("Lost patience (%d remaining trials)." % trials_of_patience) if trials_of_patience == 0: break if eta_decay_every != 'trial_of_patience' and eta_decay != 1 and \ (epoch + 1) % eta_decay_every == 0: eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay)) # save final network print("Saving final model") np.savez( modelfile, **{ 'param%d' % i: p for i, p in enumerate(lasagne.layers.get_all_param_values(network)) }) with io.open(modelfile + '.vars', 'wb') as f: f.writelines('%s=%s\n' % kv for kv in cfg.items()) if options.save_errors: np.savez(modelfile[:-len('.npz')] + '.err.npz', np.asarray(errors).reshape(epoch + 1, -1))
def main(): # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile # read configuration files and immediate settings cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) print("Preparing training data feed...") with io.open(os.path.join(datadir, 'filelists', 'train')) as f: filelist = [l.rstrip() for l in f if l.rstrip()] train_feed, train_formats = data.prepare_datafeed(filelist, datadir, 'train', cfg) # If told so, we plot some mini-batches on screen. if cfg.get('plot_datafeed'): import matplotlib.pyplot as plt for batch in data.run_datafeed(train_feed, cfg): plt.matshow(np.log(batch['spect'][0]).T, aspect='auto', origin='lower', cmap='hot', interpolation='nearest') plt.colorbar() plt.title(str(batch['label'][0])) plt.show() # We start the mini-batch generator and augmenter in one or more # background threads or processes (unless disabled). bg_threads = cfg['bg_threads'] bg_processes = cfg['bg_processes'] if not bg_threads and not bg_processes: # no background processing: just create a single generator batches = data.run_datafeed(train_feed, cfg) elif bg_threads: # multithreading: create a separate generator per thread batches = augment.generate_in_background([ data.run_datafeed(feed, cfg) for feed in data.split_datafeed(train_feed, bg_threads, cfg) ], num_cached=bg_threads * 2) elif bg_processes: # multiprocessing: single generator is forked along with processes batches = augment.generate_in_background( [data.run_datafeed(train_feed, cfg)] * bg_processes, num_cached=bg_processes * 25, in_processes=True) # If told so, we benchmark the creation of a given number of mini-batches. if cfg.get('benchmark_datafeed'): print("Benchmark: %d mini-batches of %d items " % (cfg['benchmark_datafeed'], cfg['batchsize']), end='') if bg_threads: print("(in %d threads): " % bg_threads) elif bg_processes: print("(in %d processes): " % bg_processes) else: print("(in main thread): ") import time import itertools t0 = time.time() next( itertools.islice(batches, cfg['benchmark_datafeed'], cfg['benchmark_datafeed']), None) t1 = time.time() print(t1 - t0) return # - prepare validation data generator if options.validate: print("Preparing validation data feed...") with io.open(os.path.join(datadir, 'filelists', 'valid')) as f: filelist_val = [l.rstrip() for l in f if l.rstrip()] val_feed, val_formats = data.prepare_datafeed(filelist_val, datadir, 'valid', cfg) if bg_threads or bg_processes: multi = bg_threads or bg_processes val_feed = data.split_datafeed(val_feed, multi, cfg) def run_val_datafeed(): if bg_threads or bg_processes: return augment.generate_in_background( [data.run_datafeed(feed, cfg) for feed in val_feed], num_cached=multi, in_processes=bool(bg_processes)) else: return data.run_datafeed(val_feed, cfg) print("Preparing training function...") # instantiate neural network input_vars = { name: T.TensorType(str(np.dtype(dtype)), (False, ) * len(shape))(name) for name, (dtype, shape) in train_formats.items() } input_shapes = { name: shape for name, (dtype, shape) in train_formats.items() } network = model.architecture(input_vars, input_shapes, cfg) print( "- %d layers (%d with weights), %f mio params" % (len(lasagne.layers.get_all_layers(network)), sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True) / 1e6)) print("- weight shapes: %r" % [ l.W.get_value().shape for l in lasagne.layers.get_all_layers(network) if hasattr(l, 'W') and hasattr(l.W, 'get_value') ]) cost_vars = dict(input_vars) # prepare for born-again-network, if needed if cfg.get('ban'): network2 = model.architecture(input_vars, input_shapes, cfg) with np.load(cfg['ban'], encoding='latin1') as f: lasagne.layers.set_all_param_values( network2, [f['param%d' % i] for i in range(len(f.files))]) cost_vars['pseudo_label'] = lasagne.layers.get_output( network2, deterministic=True) # load pre-trained weights, if needed if cfg.get('init_from'): param_values = [] for fn in cfg['init_from'].split(':'): with np.load(fn, encoding='latin1') as f: param_values.extend(f['param%d' % i] for i in range(len(f.files))) lasagne.layers.set_all_param_values(network, param_values) del param_values # create cost expression outputs = lasagne.layers.get_output(network, deterministic=False) cost = T.mean(model.cost(outputs, cost_vars, 'train', cfg)) if cfg.get('l2_decay', 0): cost_l2 = lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) * cfg['l2_decay'] else: cost_l2 = 0 # prepare and compile training function params = lasagne.layers.get_all_params(network, trainable=True) initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] eta_decay_every = cfg.get('eta_decay_every', 1) eta_cycle = tuple(map(float, str(cfg['eta_cycle']).split(':'))) if eta_cycle == (0, ): eta_cycle = (1, ) # so eta_cycle=0 equals disabling it patience = cfg.get('patience', 0) trials_of_patience = cfg.get('trials_of_patience', 1) patience_criterion = cfg.get( 'patience_criterion', 'valid_loss' if options.validate else 'train_loss') momentum = cfg['momentum'] first_params = params[:cfg['first_params']] first_params_eta_scale = cfg['first_params_eta_scale'] if cfg['learn_scheme'] == 'nesterov': learn_scheme = lasagne.updates.nesterov_momentum elif cfg['learn_scheme'] == 'momentum': learn_scheme = lasagne.update.momentum elif cfg['learn_scheme'] == 'adam': learn_scheme = lasagne.updates.adam else: raise ValueError('Unknown learn_scheme=%s' % cfg['learn_scheme']) eta = theano.shared(lasagne.utils.floatX(initial_eta)) if not first_params or first_params_eta_scale == 1: updates = learn_scheme(cost + cost_l2, params, eta, momentum) else: grads = theano.grad(cost + cost_l2, params) updates = learn_scheme(grads[len(first_params):], params[len(first_params):], eta, momentum) if first_params_eta_scale > 0: updates.update( learn_scheme(grads[:len(first_params)], first_params, eta * first_params_eta_scale, momentum)) print("Compiling training function...") train_fn = theano.function(list(input_vars.values()), cost, updates=updates, on_unused_input='ignore') # prepare and compile validation function, if requested if options.validate: print("Compiling validation function...") outputs_test = lasagne.layers.get_output(network, deterministic=True) cost_test = T.mean(model.cost(outputs_test, input_vars, 'valid', cfg)) if isinstance(outputs_test, (list, tuple)): outputs_test = outputs_test[0] val_fn = theano.function([input_vars[k] for k in val_formats], [cost_test, outputs_test], on_unused_input='ignore') # restore previous training state, or create fresh training state state = {} if options.keep_state: statefile = modelfile[:-len('.npz')] + '.state' if os.path.exists(statefile): print("Restoring training state...") state = np.load(modelfile[:-len('.npz')] + '.state', encoding='latin1') restore_state(network, updates, state['network']) epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) if options.save_errors: errors = state.get('errors', []) if first_params and cfg['first_params_log']: first_params_hist = [] if options.keep_state and os.path.exists(modelfile[:-4] + '.hist.npz'): with np.load(modelfile[:-4] + '.hist.npz') as f: first_params_hist = list( zip(*(f['param%d' % i] for i in range(len(first_params))))) if patience > 0: best_error = state.get('best_error', np.inf) best_state = state.get('best_state') or get_state(network, updates) patience = state.get('patience', patience) trials_of_patience = state.get('trials_of_patience', trials_of_patience) epoch = state.get('epoch', 0) del state # run training loop print("Training:") for epoch in range(epoch, epochs): # actual training err = 0 for batch in progress(range(epochsize), min_delay=.5, desc='Epoch %d/%d: Batch ' % (epoch + 1, epochs)): err += train_fn(**next(batches)) if not np.isfinite(err): print("\nEncountered NaN loss in training. Aborting.") sys.exit(1) if first_params and cfg['first_params_log'] and ( batch % cfg['first_params_log'] == 0): first_params_hist.append( tuple(param.get_value() for param in first_params)) np.savez( modelfile[:-4] + '.hist.npz', **{ 'param%d' % i: param for i, param in enumerate(zip(*first_params_hist)) }) # report training loss print("Train loss: %.3f" % (err / epochsize)) if options.save_errors: errors.append(err / epochsize) # compute and report validation loss, if requested if options.validate: import time t0 = time.time() # predict in mini-batches val_err = 0 val_batches = 0 preds = [] truth = [] for batch in run_val_datafeed(): e, p = val_fn(**batch) val_err += np.sum(e) val_batches += 1 preds.append(p) truth.append(batch['label']) t1 = time.time() # join mini-batches preds = np.concatenate(preds) if len(preds) > 1 else preds[0] truth = np.concatenate(truth) if len(truth) > 1 else truth[0] # show results print("Validation loss: %.3f" % (val_err / val_batches)) from eval import evaluate results = evaluate(preds, truth) print("Validation error: %.3f" % (1 - results['accuracy'])) print("Validation MAP: %.3f" % results['map']) print("(took %.2f seconds)" % (t1 - t0)) if options.save_errors: errors.append(val_err / val_batches) errors.append(1 - results['accuracy']) errors.append(results['map']) # update learning rate and/or apply early stopping, if needed if patience > 0: if patience_criterion == 'train_loss': cur_error = err / epochsize elif patience_criterion == 'valid_loss': cur_error = val_err / val_batches elif patience_criterion == 'valid_error': cur_error = 1 - results['accuracy'] elif patience_criterion == 'valid_map': cur_error = 1 - results['map'] if cur_error <= best_error: best_error = cur_error best_state = get_state(network, updates) patience = cfg['patience'] else: patience -= 1 if patience == 0: if eta_decay_every == 'trial_of_patience' and eta_decay != 1: eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay)) restore_state(network, updates, best_state) patience = cfg['patience'] trials_of_patience -= 1 print("Lost patience (%d remaining trials)." % trials_of_patience) if trials_of_patience == 0: break if eta_decay_every != 'trial_of_patience' and eta_decay != 1 and \ (epoch + 1) % eta_decay_every == 0: eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay)) if eta_cycle[epoch % len(eta_cycle)] != 1: eta.set_value( eta.get_value() * lasagne.utils.floatX(eta_cycle[epoch % len(eta_cycle)])) # store current training state, if needed if options.keep_state: state = {} state['epoch'] = epoch + 1 state['network'] = get_state(network, updates) if options.save_errors: state['errors'] = errors if patience > 0: state['best_error'] = best_error state['best_state'] = best_state state['patience'] = patience state['trials_of_patience'] = trials_of_patience with open(statefile, 'wb') as f: pickle.dump(state, f, -1) del state # for debugging: print memory use and break into debugger #import resource, psutil #print("Memory usage: %.3f MiB / %.3f MiB" % # (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024., # psutil.Process().memory_info()[0] / float(1024**2))) #import pdb; pdb.set_trace() # save final network print("Saving final model") save_model(modelfile, network, cfg) if options.save_errors: np.savez(modelfile[:-len('.npz')] + '.err.npz', np.asarray(errors).reshape(epoch + 1, -1))
db_query = '' for key, value in kwargs.iteritems(): db_query += "%s='%s' " % (key, value) try: conn = psycopg2.connect(db_query) cursor = conn.cursor() print "Opened database successfully" except: print "I am unable to connect to the database" return True def db_create(): cursor.execute('''CREATE TABLE HOSTEL (ID INT PRIMARY KEY NOT NULL, NAME TEXT NOT NULL, PLACES INT NOT NULL, ADDRESS CHAR(50), PRICE REAL);''') print "Table created successfully" kwargs = parse_config_file("db_access") PostgreSQLCRUD.db_connect(**kwargs) PostgreSQLCRUD.db_create() # conn.commit() # conn.close()
def main(): # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile outfile = options.outfile if options.split_pool and options.saliency: parser.error("--split-pool and --saliency cannot be combined.") # read configuration files and immediate settings cfg = {} if os.path.exists(modelfile + '.vars'): options.vars.insert(1, modelfile + '.vars') for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) # read some settings into local variables fps = cfg['fps'] len_min = cfg['len_min'] len_max = cfg['len_max'] # prepare dataset print("Preparing data reading...") datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelists filelist = [] for d in options.filelists.split(','): with io.open(os.path.join(datadir, 'filelists', d)) as f: filelist.extend(l.rstrip() for l in f if l.rstrip()) # - create data feed feed, input_formats = data.prepare_datafeed(filelist, datadir, 'test', cfg) # - we start the generator in a background thread if not options.plot: batches = augment.generate_in_background([data.run_datafeed(feed, cfg)], num_cached=1) else: # unless we're plotting; this would mess up the progress counter batches = data.run_datafeed(feed, cfg) print("Preparing prediction function...") # instantiate neural network input_vars = {name: T.TensorType(str(np.dtype(dtype)), (False,) * len(shape))(name) for name, (dtype, shape) in input_formats.items()} input_shapes = {name: shape for name, (dtype, shape) in input_formats.items()} network = model.architecture(input_vars, input_shapes, cfg) if isinstance(network, list) and not options.include_side_outputs: network = network[0] # only use the main output # load saved weights with np.load(modelfile, encoding='latin1') as f: lasagne.layers.set_all_param_values( network, [f['param%d' % i] for i in range(len(f.files))]) # insert guided backprop, if needed for saliency if options.saliency: from gbprop import replace_nonlinearities replace_nonlinearities(network, lasagne.nonlinearities.leaky_rectify) # create output expression(s) if options.split_pool: network_end = network network = next(l for l in lasagne.layers.get_all_layers(network)[::-1] if l.name == 'before_pool') outputs = lasagne.layers.get_output(network, deterministic=True) if options.split_pool: split_input_var = T.tensor4('input2') split_outputs = lasagne.layers.get_output( network_end, {network: split_input_var}, deterministic=True) split_input_vars = [v for v in theano.gof.graph.inputs([split_outputs]) if not isinstance(v, theano.compile.SharedVariable) and not isinstance(v, theano.tensor.Constant)] # create saliency map expression, if needed if options.saliency: saliency = theano.grad(outputs[:, options.saliency].sum(), input_vars['spect']) outputs = outputs + [saliency] if isinstance(outputs, list) else [outputs, saliency] # prepare and compile prediction function print("Compiling prediction function...") test_fn = theano.function(list(input_vars.values()), outputs, on_unused_input='ignore') if options.split_pool: pool_fn = theano.function(split_input_vars, split_outputs, on_unused_input='ignore') # prepare plotting, if needed if options.plot: import matplotlib if os.environ.get('MPLBACKEND'): matplotlib.use(os.environ['MPLBACKEND']) # for old versions import matplotlib.pyplot as plt with open(os.path.join(datadir, 'labels', 'labelset'), 'rb') as f: labelset = [l.rstrip('\r\n') for l in f] # run prediction loop print("Predicting:") predictions = [] for batch in batches: spect = batch.pop('spect') if spect.shape[-2] <= len_max * fps or len_max == 0: # predict on full spectrogram at once preds = test_fn(spect=spect, **batch) else: # predict in segments of len_max, with overlap len_min # drop any reminder shorter than len_min (len_max if len_min == 0) preds = [test_fn(spect=spect[..., pos:pos + len_max * fps, :], **batch) for pos in range(0, (spect.shape[-2] + 1 - (len_min or len_max) * fps), (len_max - len_min) * fps)] if isinstance(preds[0], list): preds = [np.concatenate(p, axis=2 if p[0].ndim > 2 else 0) for p in zip(*preds)] else: preds = np.concatenate(preds, axis=2 if preds[0].ndim > 2 else 0) if cfg['arch.pool'] == 'none' or '_nopool' in cfg['arch.pool']: if isinstance(preds, list): preds = [p[0, :, :, 0].T if p.ndim == 4 else p for p in preds] else: preds = preds[0, :, :, 0].T elif options.split_pool: preds = pool_fn(preds, **batch) predictions.append(preds) if options.plot: if spect.ndim == 4: spect = spect[0] # remove batch axis if spect.ndim == 3: spect = spect[0] # remove channel axis if isinstance(preds, list): preds, sides = preds[0], preds[1:] else: sides = [] fig, axs = plt.subplots(2 + len(sides), 1, sharex=True) axs[0].imshow(np.log1p(1e-3 * spect).T[::-1], cmap='hot', aspect='auto', interpolation='nearest') K = 5 top_k = lme(preds, axis=0).argpartition(preds.shape[1] - 1 - np.arange(K))[::-1][:K] #top_k = (preds * softmax(sides[0], axis=0).mean(axis=1, keepdims=True)).sum(axis=0).argpartition(preds.shape[1] - 1 - np.arange(K))[::-1][:K] #top_k = softmax(preds, axis=-1).max(axis=0).argpartition(preds.shape[1] - 1 - np.arange(K))[::-1][:K] #top_k[-1] = labelset.index('mphbjm') preds = softmax(preds, axis=-1) x = np.arange(len(preds)) * (len(spect) / float(len(preds))) for k in top_k: axs[1].plot(x, preds[:, k], label=labelset[k]) #axs[1].set_ylim(0, 1.1) axs[1].legend(loc='best') for side, ax in zip(sides, axs[2:]): side = softmax(side, axis=0) ax.plot(x, side) plt.show() # save predictions print("Saving predictions") predictions = dict(zip(filelist, predictions)) if outfile.endswith('.pkl'): try: import cPickle as pickle except ImportError: import pickle with io.open(outfile, 'wb') as f: pickle.dump(predictions, f, protocol=-1) else: np.savez(outfile, **predictions)
import time import pynput import pynput.keyboard as keyboard from pynput.keyboard import Key import pyperclip import os import win32clipboard import shutil from config import parse_config_file controller = keyboard.Controller() capture_keys = True config = parse_config_file() # 1. F2 (optional) # 2. Type what is in clipboard # 3. Enter (*) def do_paste_magic(): capture_keys = False controller.release(Key.ctrl_l) time.sleep(0.01) if config['RENAME_PRESS_F2']: press_f2() time.sleep(0.01) time.sleep(0.01) type_string(get_clipboard_text(remove_newline=True) + config['TEXT']) if config['RENAME_PRESS_ENTER']: time.sleep(0.01)
def main(): # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile outfile = options.outfile # read configuration files and immediate settings cfg = {} if os.path.exists(modelfile + '.vars'): options.vars.insert(1, modelfile + '.vars') for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) # read some settings into local variables sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset print("Preparing data reading...") datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelist filelist = [] for d in options.filelists.split(','): with io.open(os.path.join(datadir, 'filelists', d)) as f: filelist.extend(l.rstrip() for l in f if l.rstrip()) # - create generator for spectra spects = (cached( options.cache_spectra and os.path.join(options.cache_spectra, fn + '.npy'), audio.extract_spect, os.path.join(datadir, 'audio', fn), sample_rate, frame_len, fps) for fn in filelist) # - pitch-shift if needed if options.pitchshift: import scipy.ndimage spline_order = 2 spects = (scipy.ndimage.affine_transform( spect, (1, 1 / (1 + options.pitchshift / 100.)), output_shape=(len(spect), mel_max), order=spline_order) for spect in spects) # - define generator for cropped spectra spects = (spect[:, :bin_mel_max] for spect in spects) # - adjust loudness if needed if options.loudness: spects = (spect * float(10.**(options.loudness / 10.)) for spect in spects) # - define generator for silence-padding pad = np.zeros((blocklen // 2, bin_mel_max), dtype=floatX) spects = (np.concatenate((pad, spect, pad), axis=0) for spect in spects) # - we start the generator in a background thread (not required) spects = augment.generate_in_background([spects], num_cached=1) print("Preparing prediction function...") # instantiate neural network input_var = T.tensor3('input') inputs = input_var.dimshuffle(0, 'x', 1, 2) # insert "channels" dimension network = model.architecture(inputs, (None, 1, blocklen, bin_mel_max), cfg) # load saved weights with np.load(modelfile) as f: lasagne.layers.set_all_param_values( network, [f['param%d' % i] for i in range(len(f.files))]) # performant way: convert to fully-convolutional network if not options.mem_use == 'low': import model_to_fcn network = model_to_fcn.model_to_fcn(network, allow_unlink=True) # create output expression outputs = lasagne.layers.get_output(network, deterministic=True) # prepare and compile prediction function print("Compiling prediction function...") test_fn = theano.function([input_var], outputs) # run prediction loop print("Predicting:") predictions = [] for spect in progress(spects, total=len(filelist), desc='File '): if options.mem_use == 'high': # fastest way: pass full spectrogram through network at once preds = test_fn(spect[np.newaxis]) # insert batch dimension elif options.mem_use == 'mid': # performant way: pass spectrogram in equal chunks of up to one # minute, taking care to overlap by `blocklen // 2` frames and to # not pass a chunk shorter than `blocklen` frames chunks = np.ceil(len(spect) / (fps * 60.)) hopsize = int(np.ceil(len(spect) / chunks)) chunksize = hopsize + blocklen - 1 preds = np.vstack( test_fn(spect[np.newaxis, pos:pos + chunksize]) for pos in range(0, len(spect), hopsize)) else: # naive way: pass excerpts of the size used during training # - view spectrogram memory as a 3-tensor of overlapping excerpts num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect, shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) # - pass mini-batches through the network and concatenate results preds = np.vstack( test_fn(excerpts[pos:pos + batchsize]) for pos in range(0, num_excerpts, batchsize)) predictions.append(preds) if options.plot: if spect.ndim == 3: spect = spect[0] # remove channel axis spect = spect[blocklen // 2:-blocklen // 2] # remove zero padding import matplotlib.pyplot as plt fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) ax1.imshow(spect.T[::-1], vmin=-3, cmap='hot', aspect='auto', interpolation='nearest') ax2.plot(preds) ax2.set_ylim(0, 1.1) plt.show() # save predictions print("Saving predictions") data = dict(zip(filelist, predictions)) if outfile.endswith('.pkl'): try: import cPickle as pickle except ImportError: import pickle with io.open(outfile, 'wb') as f: pickle.dump(data, f, protocol=-1) else: np.savez(outfile, **data)
def main(): # parse command line parser = opts_parser() options = parser.parse_args() outdir = options.outdir if options.load_spectra != 'memory' and not options.cache_spectra: parser.error('option --load-spectra=%s requires --cache-spectra' % options.load_spectra) # read configuration files and immediate settings cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) # read some settings into local variables sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelist filelist = [] ranges = {} for part in 'train', 'valid', 'test': a = len(filelist) with io.open( os.path.join(datadir, 'filelists', cfg.get('filelist.%s' % part, part))) as f: filelist.extend(l.rstrip() for l in f if l.rstrip()) ranges[part] = slice(a, len(filelist)) # - compute spectra print("Computing%s spectra..." % (" or loading" if options.cache_spectra else "")) spects = [] for fn in progress(filelist, 'File '): cache_fn = (options.cache_spectra and os.path.join(options.cache_spectra, fn + '.npy')) spects.append( cached(cache_fn, audio.extract_spect, os.path.join(datadir, 'audio', fn), sample_rate, frame_len, fps, loading_mode=options.load_spectra)) # - load and convert corresponding labels print("Loading labels...") labels = [] for fn, spect in zip(filelist, spects): fn = os.path.join(datadir, 'labels', fn.rsplit('.', 1)[0] + '.lab') with io.open(fn) as f: segments = [l.rstrip().split() for l in f if l.rstrip()] segments = [(float(start), float(end), label == 'sing') for start, end, label in segments] timestamps = np.arange(len(spect)) / float(fps) labels.append(create_aligned_targets(segments, timestamps, np.bool)) # compute and save different variants of summarized magnitudes print("Saving files...") # - ground truth outfile = os.path.join(outdir, '%s_gt.pkl' % options.dataset) print(outfile) with io.open(outfile, 'wb') as f: pickle.dump({'labels': labels, 'splits': ranges}, f, protocol=-1) # - summarized spectra save_spectral_sums( os.path.join(outdir, '%s_spect_sum.pkl' % options.dataset), spects) # - summarized mel spectra bank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands, mel_min, mel_max).astype(np.float32) spects = [np.dot(spect[:, ], bank) for spect in spects] save_spectral_sums( os.path.join(outdir, '%s_spect_mel_sum.pkl' % options.dataset), spects) # - summarized log-mel spectra spects = [np.log(np.maximum(1e-7, spect)) for spect in spects] save_spectral_sums( os.path.join(outdir, '%s_spect_mel_log_sum.pkl' % options.dataset), spects) # - summarized standardized log-mel spectra m, s = znorm.compute_mean_std(spects[ranges['train']], axis=0) spects = [((spect - m) / s).astype(np.float32) for spect in spects] save_spectral_sums( os.path.join(outdir, '%s_spect_mel_log_std_sum.pkl' % options.dataset), spects)
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile cfg = {} print(options.vars) for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) if (options.input_type == 'audio'): dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type) batches = dataloader.prepare_audio_batches(sample_rate, frame_len, fps, blocklen, batchsize) else: dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type) batches = dataloader.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize) validation_data = DatasetLoader(options.dataset, '../ismir2015/experiments/mel_data/', datadir, dataset_split='valid', input_type='mel_spects') mel_spects_val, labels_val = validation_data.prepare_batches( sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize, batch_data=False) mdl = model.CNNModel(model_type=options.model_type, input_type=options.input_type, is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) mdl = mdl.to(device) #Setting up learning rate and learning rate parameters initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] momentum = cfg['momentum'] eta_decay_every = cfg.get('eta_decay_every', 1) eta = initial_eta #set up loss criterion = torch.nn.BCELoss() #set up optimizer optimizer = torch.optim.SGD(mdl.parameters(), lr=eta, momentum=momentum, nesterov=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=eta_decay_every, gamma=eta_decay) #set up optimizer writer = SummaryWriter(os.path.join(modelfile, 'runs')) epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) #conditions to save model best_val_loss = 100000. best_val_error = 1. for epoch in range(epochs): # - Initialize certain parameters that are used to monitor training err = 0 total_norm = 0 loss_accum = 0 mdl.train(True) # - Compute the L-2 norm of the gradients for p in mdl.parameters(): if p.grad is not None: param_norm = p.grad.data.norm(2) total_norm += param_norm.item()**2 total_norm = total_norm**(1. / 2) # - Start the training for this epoch for batch in progress(range(epochsize), min_delay=0.5, desc='Epoch %d/%d: Batch ' % (epoch + 1, epochs)): data = next(batches) if (options.input_type == 'audio' or options.input_type == 'stft'): input_data = data[0] else: input_data = np.transpose(data[0][:, :, :, np.newaxis], (0, 3, 1, 2)) labels = data[1][:, np.newaxis].astype(np.float32) #map labels to make them softer if not options.adversarial_training: labels = (0.02 + 0.96 * labels) optimizer.zero_grad() if (options.adversarial_training): mdl.train(False) if (options.input_type == 'stft'): input_data_adv = attacks.PGD( mdl, torch.from_numpy(input_data).to(device), target=torch.from_numpy(labels).to(device), eps=cfg['eps'], step_size=cfg['eps_iter'], iterations=cfg['nb_iter'], use_best=True, random_start=True, clip_min=0, clip_max=1e8).cpu().detach().numpy() else: input_data_adv = attacks.PGD( mdl, torch.from_numpy(input_data).to(device), target=torch.from_numpy(labels).to(device), eps=cfg['eps'], step_size=cfg['eps_iter'], iterations=cfg['nb_iter'], use_best=True, random_start=True).cpu().detach().numpy() mdl.train(True) optimizer.zero_grad() outputs = mdl(torch.from_numpy(input_data_adv).to(device)) else: optimizer.zero_grad() outputs = mdl(torch.from_numpy(input_data).to(device)) #input(outputs.size()) #input(mdl.conv(torch.from_numpy(input_data).to(device)).cpu().detach().numpy().shape) loss = criterion(outputs, torch.from_numpy(labels).to(device)) loss.backward() optimizer.step() print(loss.item()) loss_accum += loss.item() # - Compute validation loss and error if desired if options.validate: mdl.input_type = 'mel_spects' from eval import evaluate mdl.train(False) val_loss = 0 preds = [] labs = [] max_len = fps num_iter = 0 for spect, label in zip(mel_spects_val, labels_val): num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect, shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) # - Pass mini-batches through the network and concatenate results for pos in range(0, num_excerpts, batchsize): input_data = np.transpose( excerpts[pos:pos + batchsize, :, :, np.newaxis], (0, 3, 1, 2)) #if (pos+batchsize>num_excerpts): # label_batch = label[blocklen//2+pos:blocklen//2+num_excerpts, # np.newaxis].astype(np.float32) #else: # label_batch = label[blocklen//2+pos:blocklen//2+pos+batchsize, # np.newaxis].astype(np.float32) if (pos + batchsize > num_excerpts): label_batch = label[pos:num_excerpts, np.newaxis].astype(np.float32) else: label_batch = label[pos:pos + batchsize, np.newaxis].astype(np.float32) pred = mdl(torch.from_numpy(input_data).to(device)) e = criterion(pred, torch.from_numpy(label_batch).to(device)) preds = np.append(preds, pred[:, 0].cpu().detach().numpy()) labs = np.append(labs, label_batch) val_loss += e.item() num_iter += 1 mdl.input_type = options.input_type print("Validation loss: %.3f" % (val_loss / num_iter)) _, results = evaluate(preds, labs) print("Validation error: %.3f" % (1 - results['accuracy'])) if (1 - results['accuracy'] < best_val_error): torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) best_val_loss = val_loss / num_iter best_val_error = 1 - results['accuracy'] print('New saved model', best_val_loss, best_val_error) #Update the learning rate scheduler.step() print('Training Loss per epoch', loss_accum / epochsize) # - Save parameters for examining writer.add_scalar('Training Loss', loss_accum / epochsize, epoch) writer.add_scalar('Validation loss', val_loss / num_iter, epoch) writer.add_scalar('Gradient norm', total_norm, epoch) writer.add_scalar('Validation error', 1 - results['accuracy']) #for param_group in optimizer.param_groups: #print(param_group['lr']) if not options.validate: torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) with io.open(os.path.join(modelfile, 'model.vars'), 'w') as f: f.writelines('%s=%s\n' % kv for kv in cfg.items())
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile lossgradient = options.lossgradient cfg = {} print(options.vars) print('Model save file:', modelfile) print('Lossgrad file:', lossgradient) for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] print('Occluded amount:',cfg['occlude']) bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type) batches = dataloader.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize) validation_data = DatasetLoader(options.dataset, '../ismir2015/experiments/mel_data/', datadir, dataset_split='valid', input_type='mel_spects') mel_spects_val, labels_val = validation_data.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize, batch_data=False) with np.load(meanstd_file) as f: mean = f['mean'] std = f['std'] mean = mean.astype(floatX) istd = np.reciprocal(std).astype(floatX) if(options.input_type=='mel_spects'): mdl = model.CNNModel(input_type='mel_spects_norm', is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) if(lossgradient!='None'): mdl_lossgrad = model.CNNModel(input_type=options.input_type, is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) mdl_lossgrad.load_state_dict(torch.load(lossgradient)) mdl_lossgrad.to(device) mdl_lossgrad.eval() mdl = mdl.to(device) #Setting up learning rate and learning rate parameters initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] momentum = cfg['momentum'] eta_decay_every = cfg.get('eta_decay_every', 1) eta = initial_eta #set up loss criterion = torch.nn.BCELoss() #set up optimizer optimizer = torch.optim.SGD(mdl.parameters(),lr=eta,momentum=momentum,nesterov=True) #optimizer = torch.optim.Adam(mdl.parameters(), lr=eta, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=eta_decay_every,gamma=eta_decay) #set up optimizer writer = SummaryWriter(os.path.join(modelfile,'runs')) epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) #conditions to save model best_val_loss = 100000. best_val_error = 1. #loss gradient values for validation data loss_grad_val = validation_data.prepare_loss_grad_batches(options.loss_grad_save, mel_spects_val, labels_val, mdl_lossgrad, criterion, blocklen, batchsize, device) for epoch in range(epochs): # - Initialize certain parameters that are used to monitor training err = 0 total_norm = 0 loss_accum = 0 mdl.train(True) # - Compute the L-2 norm of the gradients for p in mdl.parameters(): if p.grad is not None: param_norm = p.grad.data.norm(2) total_norm += param_norm.item() ** 2 total_norm = total_norm ** (1. / 2) # - Start the training for this epoch for batch in progress(range(epochsize), min_delay=0.5,desc='Epoch %d/%d: Batch ' % (epoch+1, epochs)): data = next(batches) if(options.input_type=='audio' or options.input_type=='stft'): input_data = data[0] else: input_data = np.transpose(data[0][:,:,:,np.newaxis],(0,3,1,2)) labels = data[1][:,np.newaxis].astype(np.float32) input_data_loss = input_data if lossgradient!='None': g = loss_grad(mdl_lossgrad, torch.from_numpy(input_data_loss).to(device).requires_grad_(True), torch.from_numpy(labels).to(device), criterion) g = np.squeeze(g) input_data = (input_data-mean) * istd for i in range(batchsize): if(options.lossgrad_algorithm=='grad'): rank_matrix = np.abs(g[i]) elif(options.lossgrad_algorithm=='gradxinp'): rank_matrix = np.squeeze(g[i]*input_data[i,:,:,:]) elif(options.lossgrad_algorithm=='gradorig'): rank_matrix = g[i] v = np.argsort(rank_matrix, axis=None)[-cfg['occlude']:] input_data[i,:,v//80,v%80] = 0 else: for i in range(batchsize): #print('random') v = np.random.choice(115*80, cfg['occlude'], replace=False) input_data[i,:,v//80,v%80] = 0 input_data = input_data.astype(floatX) labels = (0.02 + 0.96*labels) optimizer.zero_grad() outputs = mdl(torch.from_numpy(input_data).to(device)) loss = criterion(outputs, torch.from_numpy(labels).to(device)) loss.backward() optimizer.step() #print(loss.item()) loss_accum += loss.item() # - Compute validation loss and error if desired if options.validate: #mdl.model_type = 'mel_spects' from eval import evaluate mdl.train(False) val_loss = 0 preds = [] labs = [] max_len = fps num_iter = 0 for spect, label, g in zip(mel_spects_val, labels_val, loss_grad_val): num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect, shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) # - Pass mini-batches through the network and concatenate results for pos in range(0, num_excerpts, batchsize): input_data = np.transpose(excerpts[pos:pos + batchsize,:,:,np.newaxis],(0,3,1,2)) #if (pos+batchsize>num_excerpts): # label_batch = label[blocklen//2+pos:blocklen//2+num_excerpts, # np.newaxis].astype(np.float32) #else: # label_batch = label[blocklen//2+pos:blocklen//2+pos+batchsize, # np.newaxis].astype(np.float32) if (pos+batchsize>num_excerpts): label_batch = label[pos:num_excerpts, np.newaxis].astype(np.float32) else: label_batch = label[pos:pos+batchsize, np.newaxis].astype(np.float32) #input_data_loss = input_data if lossgradient!='None': #grads = loss_grad(mdl_lossgrad, torch.from_numpy(input_data_loss).to(device).requires_grad_(True), torch.from_numpy(label_batch).to(device), criterion) input_data = (input_data-mean) * istd for i in range(input_data.shape[0]): if(options.lossgrad_algorithm=='grad'): rank_matrix = np.abs(g[i]) elif(options.lossgrad_algorithm=='gradxinp'): rank_matrix = np.squeeze(g[i]*input_data[i,:,:,:]) elif(options.lossgrad_algorithm=='gradorig'): rank_matrix = g[i] v = np.argsort(np.abs(rank_matrix), axis=None)[-cfg['occlude']:] input_data[i,:,v//80,v%80] = 0 else: for i in range(input_data.shape[0]): #print('random') v = np.random.choice(115*80, cfg['occlude'], replace=False) input_data[i,:,v//80,v%80] = 0 input_data = input_data.astype(floatX) pred = mdl(torch.from_numpy(input_data).to(device)) e = criterion(pred,torch.from_numpy(label_batch).to(device)) preds = np.append(preds,pred[:,0].cpu().detach().numpy()) labs = np.append(labs,label_batch) val_loss +=e.item() num_iter+=1 #mdl.model_type = 'mel_spects_norm' print("Validation loss: %.3f" % (val_loss / num_iter)) _, results = evaluate(preds,labs) print("Validation error: %.3f" % (1 - results['accuracy'])) if(1-results['accuracy']<best_val_error): torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) best_val_loss = val_loss/num_iter best_val_error = 1-results['accuracy'] print('New saved model',best_val_loss, best_val_error) #Update the learning rate scheduler.step() print('Training Loss per epoch', loss_accum/epochsize) # - Save parameters for examining writer.add_scalar('Training Loss',loss_accum/epochsize,epoch) if(options.validate): writer.add_scalar('Validation loss', val_loss/num_iter,epoch) writer.add_scalar('Gradient norm', total_norm, epoch) writer.add_scalar('Validation error', 1-results['accuracy']) #for param_group in optimizer.param_groups: #print(param_group['lr']) if not options.validate: torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) with io.open(os.path.join(modelfile, 'model.vars'), 'w') as f: f.writelines('%s=%s\n' % kv for kv in cfg.items())