コード例 #1
0
def compile_prediction_function_audio(modelfile, input_dim, excerpt_size):
    """
    Compiles a function to compute the classification prediction
    for a given number of input excerpts.
    """
    #print("Preparing prediction function...")
    # instantiate neural network
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(0, 'x', 1, 2)  # insert "channels" dimension
    network = model.architecture(inputs, (None, 1, excerpt_size, input_dim))

    # load saved weights
    with np.load(modelfile) as f:
        lasagne.layers.set_all_param_values(
            network, [f['param%d' % i] for i in range(len(f.files))])

    # create output expression
    outputs = lasagne.layers.get_output(network, deterministic=True)

    # prepare and compile prediction function
    #print("Compiling prediction function...")
    return theano.function([input_var], outputs)
コード例 #2
0
def main():
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    sample_rate = 22050
    frame_len = 1024
    fps = 70
    mel_bands = 80
    mel_min = 27.5
    mel_max = 8000
    blocklen = 115
    batchsize = 32
    
    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__),
                           os.path.pardir, 'datasets', options.dataset)

    # - load filelist
    with io.open(os.path.join(datadir, 'filelists', 'train')) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]

    # - compute spectra
    print("Computing%s spectra..." %
          (" or loading" if options.cache_spectra else ""))
    spects = []
    for fn in progress(filelist, 'File '):
        cache_fn = (options.cache_spectra and
                    os.path.join(options.cache_spectra, fn + '.npy'))
        spects.append(cached(cache_fn,
                             audio.extract_spect,
                             os.path.join(datadir, 'audio', fn),
                             sample_rate, frame_len, fps))

    # - load and convert corresponding labels
    print("Loading labels...")
    labels = []
    for fn, spect in zip(filelist, spects):
        fn = os.path.join(datadir, 'labels', fn.rsplit('.', 1)[0] + '.lab')
        with io.open(fn) as f:
            segments = [l.rstrip().split() for l in f if l.rstrip()]
        segments = [(float(start), float(end), label == 'sing')
                    for start, end, label in segments]
        timestamps = np.arange(len(spect)) / float(fps)
        labels.append(create_aligned_targets(segments, timestamps, np.bool))

    # - prepare mel filterbank
    filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands,
                                             mel_min, mel_max)
    filterbank = filterbank[:bin_mel_max].astype(floatX)

    # - precompute mel spectra, if needed, otherwise just define a generator
    mel_spects = (np.log(np.maximum(np.dot(spect[:, :bin_mel_max], filterbank),
                                    1e-7))
                  for spect in spects)
    if not options.augment:
        mel_spects = list(mel_spects)
        del spects

    # - load mean/std or compute it, if not computed yet
    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % options.dataset)
    try:
        with np.load(meanstd_file) as f:
            mean = f['mean']
            std = f['std']
    except (IOError, KeyError):
        print("Computing mean and standard deviation...")
        mean, std = znorm.compute_mean_std(mel_spects)
        np.savez(meanstd_file, mean=mean, std=std)
    mean = mean.astype(floatX)
    istd = np.reciprocal(std).astype(floatX)

    # - prepare training data generator
    print("Preparing training data feed...")
    if not options.augment:
        # Without augmentation, we just precompute the normalized mel spectra
        # and create a generator that returns mini-batches of random excerpts
        mel_spects = [(spect - mean) * istd for spect in mel_spects]
        batches = augment.grab_random_excerpts(
            mel_spects, labels, batchsize, blocklen)
    else:
        # For time stretching and pitch shifting, it pays off to preapply the
        # spline filter to each input spectrogram, so it does not need to be
        # applied to each mini-batch later.
        spline_order = 2
        if spline_order > 1:
            from scipy.ndimage import spline_filter
            spects = [spline_filter(spect, spline_order).astype(floatX)
                      for spect in spects]

        # We define a function to create the mini-batch generator. This allows
        # us to easily create multiple generators for multithreading if needed.
        def create_datafeed(spects, labels):
            # With augmentation, as we want to apply random time-stretching,
            # we request longer excerpts than we finally need to return.
            max_stretch = .3
            batches = augment.grab_random_excerpts(
                    spects, labels, batchsize=batchsize,
                    frames=int(blocklen / (1 - max_stretch)))

            # We wrap the generator in another one that applies random time
            # stretching and pitch shifting, keeping a given number of frames
            # and bins only.
            max_shift = .3
            batches = augment.apply_random_stretch_shift(
                    batches, max_stretch, max_shift,
                    keep_frames=blocklen, keep_bins=bin_mel_max,
                    order=spline_order, prefiltered=True)

            # We transform the excerpts to mel frequency and log magnitude.
            batches = augment.apply_filterbank(batches, filterbank)
            batches = augment.apply_logarithm(batches)

            # We apply random frequency filters
            batches = augment.apply_random_filters(batches, filterbank,
                                                   mel_max, max_db=10)

            # We apply normalization
            batches = augment.apply_znorm(batches, mean, istd)

            return batches

        # We start the mini-batch generator and augmenter in one or more
        # background threads or processes (unless disabled).
        bg_threads = 3
        bg_processes = 0
        if not bg_threads and not bg_processes:
            # no background processing: just create a single generator
            batches = create_datafeed(spects, labels)
        elif bg_threads:
            # multithreading: create a separate generator per thread
            batches = augment.generate_in_background(
                    [create_datafeed(spects, labels)
                     for _ in range(bg_threads)],
                    num_cached=bg_threads * 5)
        elif bg_processes:
            # multiprocessing: single generator is forked along with processes
            batches = augment.generate_in_background(
                    [create_datafeed(spects, labels)] * bg_processes,
                    num_cached=bg_processes * 25,
                    in_processes=True)


    print("Preparing training function...")
    # instantiate neural network
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(0, 'x', 1, 2)  # insert "channels" dimension
    network = model.architecture(inputs, (None, 1, blocklen, mel_bands))

    # create cost expression
    target_var = T.vector('targets')
    targets = (0.02 + 0.96 * target_var)  # map 0 -> 0.02, 1 -> 0.98
    targets = targets.dimshuffle(0, 'x')  # turn into column vector
    outputs = lasagne.layers.get_output(network, deterministic=False)
    cost = T.mean(lasagne.objectives.binary_crossentropy(outputs, targets))

    # prepare and compile training function
    params = lasagne.layers.get_all_params(network, trainable=True)
    initial_eta = 0.01
    eta_decay = 0.85
    momentum = 0.95
    eta = theano.shared(lasagne.utils.floatX(initial_eta))
    updates = lasagne.updates.nesterov_momentum(cost, params, eta, momentum)
    print("Compiling training function...")
    train_fn = theano.function([input_var, target_var], cost, updates=updates)

    # run training loop
    print("Training:")
    epochs = 20
    epochsize = 2000
    batches = iter(batches)
    for epoch in range(epochs):
        err = 0
        for batch in progress(
                range(epochsize), min_delay=.5,
                desc='Epoch %d/%d: Batch ' % (epoch + 1, epochs)):
            err += train_fn(*next(batches))
            if not np.isfinite(err):
                print("\nEncountered NaN loss in training. Aborting.")
                sys.exit(1)
        print("Train loss: %.3f" % (err / epochsize))
        eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay))

    # save final network
    print("Saving final model")
    np.savez(modelfile, **{'param%d' % i: p for i, p in enumerate(
            lasagne.layers.get_all_param_values(network))})
コード例 #3
0
def main():
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile

    # read configuration files and immediate settings
    cfg = {}
    for fn in options.vars:
        cfg.update(config.parse_config_file(fn))
    cfg.update(config.parse_variable_assignments(options.var))

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__), os.path.pardir,
                           'datasets', options.dataset)

    print("Preparing training data feed...")
    with io.open(os.path.join(datadir, 'filelists', 'train')) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]
    train_feed, train_formats = data.prepare_datafeed(filelist, datadir,
                                                      'train', cfg)

    # If told so, we plot some mini-batches on screen.
    if cfg.get('plot_datafeed'):
        import matplotlib.pyplot as plt
        for batch in data.run_datafeed(train_feed, cfg):
            plt.matshow(np.log(batch['spect'][0]).T,
                        aspect='auto',
                        origin='lower',
                        cmap='hot',
                        interpolation='nearest')
            plt.colorbar()
            plt.title(str(batch['label'][0]))
            plt.show()

    # We start the mini-batch generator and augmenter in one or more
    # background threads or processes (unless disabled).
    bg_threads = cfg['bg_threads']
    bg_processes = cfg['bg_processes']
    if not bg_threads and not bg_processes:
        # no background processing: just create a single generator
        batches = data.run_datafeed(train_feed, cfg)
    elif bg_threads:
        # multithreading: create a separate generator per thread
        batches = augment.generate_in_background([
            data.run_datafeed(feed, cfg)
            for feed in data.split_datafeed(train_feed, bg_threads, cfg)
        ],
                                                 num_cached=bg_threads * 2)
    elif bg_processes:
        # multiprocessing: single generator is forked along with processes
        batches = augment.generate_in_background(
            [data.run_datafeed(train_feed, cfg)] * bg_processes,
            num_cached=bg_processes * 25,
            in_processes=True)

    # If told so, we benchmark the creation of a given number of mini-batches.
    if cfg.get('benchmark_datafeed'):
        print("Benchmark: %d mini-batches of %d items " %
              (cfg['benchmark_datafeed'], cfg['batchsize']),
              end='')
        if bg_threads:
            print("(in %d threads): " % bg_threads)
        elif bg_processes:
            print("(in %d processes): " % bg_processes)
        else:
            print("(in main thread): ")
        import time
        import itertools
        t0 = time.time()
        next(
            itertools.islice(batches, cfg['benchmark_datafeed'],
                             cfg['benchmark_datafeed']), None)
        t1 = time.time()
        print(t1 - t0)
        return

    # - prepare validation data generator
    if options.validate:
        print("Preparing validation data feed...")
        with io.open(os.path.join(datadir, 'filelists', 'valid')) as f:
            filelist_val = [l.rstrip() for l in f if l.rstrip()]
        val_feed, val_formats = data.prepare_datafeed(filelist_val, datadir,
                                                      'valid', cfg)
        if bg_threads or bg_processes:
            multi = bg_threads or bg_processes
            val_feed = data.split_datafeed(val_feed, multi, cfg)

        def run_val_datafeed():
            if bg_threads or bg_processes:
                return augment.generate_in_background(
                    [data.run_datafeed(feed, cfg) for feed in val_feed],
                    num_cached=multi,
                    in_processes=bool(bg_processes))
            else:
                return data.run_datafeed(val_feed, cfg)

    print("Preparing training function...")
    # instantiate neural network
    input_vars = {
        name: T.TensorType(str(np.dtype(dtype)), (False, ) * len(shape))(name)
        for name, (dtype, shape) in train_formats.items()
    }
    input_shapes = {
        name: shape
        for name, (dtype, shape) in train_formats.items()
    }
    network = model.architecture(input_vars, input_shapes, cfg)
    print(
        "- %d layers (%d with weights), %f mio params" %
        (len(lasagne.layers.get_all_layers(network)),
         sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)),
         lasagne.layers.count_params(network, trainable=True) / 1e6))
    print("- weight shapes: %r" % [
        l.W.get_value().shape for l in lasagne.layers.get_all_layers(network)
        if hasattr(l, 'W') and hasattr(l.W, 'get_value')
    ])
    cost_vars = dict(input_vars)

    # prepare for born-again-network, if needed
    if cfg.get('ban'):
        network2 = model.architecture(input_vars, input_shapes, cfg)
        with np.load(cfg['ban'], encoding='latin1') as f:
            lasagne.layers.set_all_param_values(
                network2, [f['param%d' % i] for i in range(len(f.files))])
        cost_vars['pseudo_label'] = lasagne.layers.get_output(
            network2, deterministic=True)

    # load pre-trained weights, if needed
    if cfg.get('init_from'):
        param_values = []
        for fn in cfg['init_from'].split(':'):
            with np.load(fn, encoding='latin1') as f:
                param_values.extend(f['param%d' % i]
                                    for i in range(len(f.files)))
        lasagne.layers.set_all_param_values(network, param_values)
        del param_values

    # create cost expression
    outputs = lasagne.layers.get_output(network, deterministic=False)
    cost = T.mean(model.cost(outputs, cost_vars, 'train', cfg))
    if cfg.get('l2_decay', 0):
        cost_l2 = lasagne.regularization.regularize_network_params(
            network, lasagne.regularization.l2) * cfg['l2_decay']
    else:
        cost_l2 = 0

    # prepare and compile training function
    params = lasagne.layers.get_all_params(network, trainable=True)
    initial_eta = cfg['initial_eta']
    eta_decay = cfg['eta_decay']
    eta_decay_every = cfg.get('eta_decay_every', 1)
    eta_cycle = tuple(map(float, str(cfg['eta_cycle']).split(':')))
    if eta_cycle == (0, ):
        eta_cycle = (1, )  # so eta_cycle=0 equals disabling it
    patience = cfg.get('patience', 0)
    trials_of_patience = cfg.get('trials_of_patience', 1)
    patience_criterion = cfg.get(
        'patience_criterion',
        'valid_loss' if options.validate else 'train_loss')
    momentum = cfg['momentum']
    first_params = params[:cfg['first_params']]
    first_params_eta_scale = cfg['first_params_eta_scale']
    if cfg['learn_scheme'] == 'nesterov':
        learn_scheme = lasagne.updates.nesterov_momentum
    elif cfg['learn_scheme'] == 'momentum':
        learn_scheme = lasagne.update.momentum
    elif cfg['learn_scheme'] == 'adam':
        learn_scheme = lasagne.updates.adam
    else:
        raise ValueError('Unknown learn_scheme=%s' % cfg['learn_scheme'])
    eta = theano.shared(lasagne.utils.floatX(initial_eta))
    if not first_params or first_params_eta_scale == 1:
        updates = learn_scheme(cost + cost_l2, params, eta, momentum)
    else:
        grads = theano.grad(cost + cost_l2, params)
        updates = learn_scheme(grads[len(first_params):],
                               params[len(first_params):], eta, momentum)
        if first_params_eta_scale > 0:
            updates.update(
                learn_scheme(grads[:len(first_params)], first_params,
                             eta * first_params_eta_scale, momentum))
    print("Compiling training function...")
    train_fn = theano.function(list(input_vars.values()),
                               cost,
                               updates=updates,
                               on_unused_input='ignore')

    # prepare and compile validation function, if requested
    if options.validate:
        print("Compiling validation function...")
        outputs_test = lasagne.layers.get_output(network, deterministic=True)
        cost_test = T.mean(model.cost(outputs_test, input_vars, 'valid', cfg))
        if isinstance(outputs_test, (list, tuple)):
            outputs_test = outputs_test[0]
        val_fn = theano.function([input_vars[k] for k in val_formats],
                                 [cost_test, outputs_test],
                                 on_unused_input='ignore')

    # restore previous training state, or create fresh training state
    state = {}
    if options.keep_state:
        statefile = modelfile[:-len('.npz')] + '.state'
        if os.path.exists(statefile):
            print("Restoring training state...")
            state = np.load(modelfile[:-len('.npz')] + '.state',
                            encoding='latin1')
            restore_state(network, updates, state['network'])
    epochs = cfg['epochs']
    epochsize = cfg['epochsize']
    batches = iter(batches)
    if options.save_errors:
        errors = state.get('errors', [])
    if first_params and cfg['first_params_log']:
        first_params_hist = []
        if options.keep_state and os.path.exists(modelfile[:-4] + '.hist.npz'):
            with np.load(modelfile[:-4] + '.hist.npz') as f:
                first_params_hist = list(
                    zip(*(f['param%d' % i] for i in range(len(first_params)))))
    if patience > 0:
        best_error = state.get('best_error', np.inf)
        best_state = state.get('best_state') or get_state(network, updates)
        patience = state.get('patience', patience)
        trials_of_patience = state.get('trials_of_patience',
                                       trials_of_patience)
    epoch = state.get('epoch', 0)
    del state

    # run training loop
    print("Training:")
    for epoch in range(epoch, epochs):
        # actual training
        err = 0
        for batch in progress(range(epochsize),
                              min_delay=.5,
                              desc='Epoch %d/%d: Batch ' %
                              (epoch + 1, epochs)):
            err += train_fn(**next(batches))
            if not np.isfinite(err):
                print("\nEncountered NaN loss in training. Aborting.")
                sys.exit(1)
            if first_params and cfg['first_params_log'] and (
                    batch % cfg['first_params_log'] == 0):
                first_params_hist.append(
                    tuple(param.get_value() for param in first_params))
                np.savez(
                    modelfile[:-4] + '.hist.npz', **{
                        'param%d' % i: param
                        for i, param in enumerate(zip(*first_params_hist))
                    })

        # report training loss
        print("Train loss: %.3f" % (err / epochsize))
        if options.save_errors:
            errors.append(err / epochsize)

        # compute and report validation loss, if requested
        if options.validate:
            import time
            t0 = time.time()
            # predict in mini-batches
            val_err = 0
            val_batches = 0
            preds = []
            truth = []
            for batch in run_val_datafeed():
                e, p = val_fn(**batch)
                val_err += np.sum(e)
                val_batches += 1
                preds.append(p)
                truth.append(batch['label'])
            t1 = time.time()
            # join mini-batches
            preds = np.concatenate(preds) if len(preds) > 1 else preds[0]
            truth = np.concatenate(truth) if len(truth) > 1 else truth[0]
            # show results
            print("Validation loss: %.3f" % (val_err / val_batches))
            from eval import evaluate
            results = evaluate(preds, truth)
            print("Validation error: %.3f" % (1 - results['accuracy']))
            print("Validation MAP: %.3f" % results['map'])
            print("(took %.2f seconds)" % (t1 - t0))
            if options.save_errors:
                errors.append(val_err / val_batches)
                errors.append(1 - results['accuracy'])
                errors.append(results['map'])

        # update learning rate and/or apply early stopping, if needed
        if patience > 0:
            if patience_criterion == 'train_loss':
                cur_error = err / epochsize
            elif patience_criterion == 'valid_loss':
                cur_error = val_err / val_batches
            elif patience_criterion == 'valid_error':
                cur_error = 1 - results['accuracy']
            elif patience_criterion == 'valid_map':
                cur_error = 1 - results['map']
            if cur_error <= best_error:
                best_error = cur_error
                best_state = get_state(network, updates)
                patience = cfg['patience']
            else:
                patience -= 1
                if patience == 0:
                    if eta_decay_every == 'trial_of_patience' and eta_decay != 1:
                        eta.set_value(eta.get_value() *
                                      lasagne.utils.floatX(eta_decay))
                    restore_state(network, updates, best_state)
                    patience = cfg['patience']
                    trials_of_patience -= 1
                    print("Lost patience (%d remaining trials)." %
                          trials_of_patience)
                    if trials_of_patience == 0:
                        break
        if eta_decay_every != 'trial_of_patience' and eta_decay != 1 and \
                (epoch + 1) % eta_decay_every == 0:
            eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay))
        if eta_cycle[epoch % len(eta_cycle)] != 1:
            eta.set_value(
                eta.get_value() *
                lasagne.utils.floatX(eta_cycle[epoch % len(eta_cycle)]))

        # store current training state, if needed
        if options.keep_state:
            state = {}
            state['epoch'] = epoch + 1
            state['network'] = get_state(network, updates)
            if options.save_errors:
                state['errors'] = errors
            if patience > 0:
                state['best_error'] = best_error
                state['best_state'] = best_state
                state['patience'] = patience
                state['trials_of_patience'] = trials_of_patience
            with open(statefile, 'wb') as f:
                pickle.dump(state, f, -1)
            del state

        # for debugging: print memory use and break into debugger
        #import resource, psutil
        #print("Memory usage: %.3f MiB / %.3f MiB" %
        #      (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.,
        #       psutil.Process().memory_info()[0] / float(1024**2)))
        #import pdb; pdb.set_trace()

    # save final network
    print("Saving final model")
    save_model(modelfile, network, cfg)
    if options.save_errors:
        np.savez(modelfile[:-len('.npz')] + '.err.npz',
                 np.asarray(errors).reshape(epoch + 1, -1))
コード例 #4
0
ファイル: predict.py プロジェクト: remidefleurian/ismir2015
def main():
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    outfile = options.outfile
    sample_rate = 22050
    frame_len = 1024
    fps = 70
    mel_bands = 80
    mel_min = 27.5
    mel_max = 8000
    blocklen = 115
    batchsize = 32
    
    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    print("Preparing data reading...")
    datadir = os.path.join(os.path.dirname(__file__),
                           os.path.pardir, 'datasets', options.dataset)

    # - load filelist
    with io.open(os.path.join(datadir, 'filelists', 'valid')) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]
    with io.open(os.path.join(datadir, 'filelists', 'test')) as f:
        filelist += [l.rstrip() for l in f if l.rstrip()]

    # - create generator for spectra
    spects = (cached(options.cache_spectra and
                     os.path.join(options.cache_spectra, fn + '.npy'),
                     audio.extract_spect,
                     os.path.join(datadir, 'audio', fn),
                     sample_rate, frame_len, fps)
              for fn in filelist)

    # - pitch-shift if needed
    if options.pitchshift:
        import scipy.ndimage
        spline_order = 2
        spects = (scipy.ndimage.affine_transform(
                    spect, (1, 1 / (1 + options.pitchshift / 100.)),
                    output_shape=(len(spect), mel_max),
                    order=spline_order)
                  for spect in spects)

    # - prepare mel filterbank
    filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands,
                                             mel_min, mel_max)
    filterbank = filterbank[:bin_mel_max].astype(floatX)

    # - define generator for mel spectra
    spects = (np.log(np.maximum(np.dot(spect[:, :bin_mel_max], filterbank),
                                1e-7))
              for spect in spects)

    # - load mean/std
    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % options.dataset)
    with np.load(meanstd_file) as f:
        mean = f['mean']
        std = f['std']
    mean = mean.astype(floatX)
    istd = np.reciprocal(std).astype(floatX)

    # - define generator for Z-scoring
    spects = ((spect - mean) * istd for spect in spects)

    # - define generator for silence-padding
    pad = np.tile((np.log(1e-7) - mean) * istd, (blocklen // 2, 1))
    spects = (np.concatenate((pad, spect, pad), axis=0) for spect in spects)

    # - we start the generator in a background thread (not required)
    spects = augment.generate_in_background([spects], num_cached=1)


    print("Preparing prediction function...")
    # instantiate neural network
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(0, 'x', 1, 2)  # insert "channels" dimension
    network = model.architecture(inputs, (None, 1, blocklen, mel_bands))

    # load saved weights
    with np.load(modelfile) as f:
        lasagne.layers.set_all_param_values(
                network, [f['param%d' % i] for i in range(len(f.files))])

    # performant way: convert to fully-convolutional network
    if not options.mem_use == 'low':
        import model_to_fcn
        network = model_to_fcn.model_to_fcn(network, allow_unlink=True)

    # create output expression
    outputs = lasagne.layers.get_output(network, deterministic=True)

    # prepare and compile prediction function
    print("Compiling prediction function...")
    test_fn = theano.function([input_var], outputs)

    # run prediction loop
    print("Predicting:")
    predictions = []
    for spect in progress(spects, total=len(filelist), desc='File '):
        if options.mem_use == 'high':
            # fastest way: pass full spectrogram through network at once
            preds = test_fn(spect[np.newaxis])  # insert batch dimension
        elif options.mem_use == 'mid':
            # performant way: pass spectrogram in equal chunks of up to one
            # minute, taking care to overlap by `blocklen // 2` frames and to
            # not pass a chunk shorter than `blocklen` frames
            chunks = np.ceil(len(spect) / (fps * 60.))
            hopsize = int(np.ceil(len(spect) / chunks))
            chunksize = hopsize + blocklen - 1
            preds = np.vstack(test_fn(spect[np.newaxis, pos:pos + chunksize])
                              for pos in range(0, len(spect), hopsize))
        else:
            # naive way: pass excerpts of the size used during training
            # - view spectrogram memory as a 3-tensor of overlapping excerpts
            num_excerpts = len(spect) - blocklen + 1
            excerpts = np.lib.stride_tricks.as_strided(
                    spect, shape=(num_excerpts, blocklen, spect.shape[1]),
                    strides=(spect.strides[0], spect.strides[0], spect.strides[1]))
            # - pass mini-batches through the network and concatenate results
            preds = np.vstack(test_fn(excerpts[pos:pos + batchsize])
                              for pos in range(0, num_excerpts, batchsize))
        predictions.append(preds)
        if options.plot:
            if spect.ndim == 3:
                spect = spect[0]  # remove channel axis
            spect = spect[blocklen//2:-blocklen//2]  # remove zero padding
            import matplotlib.pyplot as plt
            fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
            ax1.imshow(spect.T[::-1], vmin=-3, cmap='hot', aspect='auto',
                       interpolation='nearest')
            ax2.plot(preds)
            ax2.set_ylim(0, 1.1)
            plt.show()

    # save predictions
    print("Saving predictions")
    np.savez(outfile, **{fn: pred for fn, pred in zip(filelist, predictions)})
コード例 #5
0
ファイル: invert.py プロジェクト: saum25/ISMIR-2018
def main():
    # parse the command line arguments
    parser = utils.argument_parser()
    args = parser.parse_args()

    print("-------------------------------")
    print("classifier:%s" % args.classifier)
    print("inverter:%s" % args.inverter)
    print("dataset_path:%s" % args.dataset_path)
    print("dataset name:%s" % args.dataset)
    print("results path:%s" % args.results_dir)
    print("inverting from: %s" % args.layer)
    print("-------------------------------")

    # default parameters
    sample_rate = 22050
    frame_len = 1024
    fps = 70
    mel_bands = 80
    mel_min = 27.5
    mel_max = 8000
    blocklen = 115
    batchsize = 32
    start_offset = 10  # secs
    end_offset = 20  # secs

    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__), args.dataset_path,
                           'datasets', args.dataset)

    # load filelist
    with io.open(os.path.join(datadir, 'filelists', 'test')) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]

    # compute spectra
    print("Computing%s spectra..." %
          (" or loading" if args.cache_spectra else ""))

    spects = [
    ]  # list of tuples, where each tuple has magnitude and phase information for one audio file
    for fn in progress(filelist, 'File '):
        cache_fn = (args.cache_spectra
                    and os.path.join(args.cache_spectra, fn + '.npy'))
        spects.append(
            cached(cache_fn, audio.extract_spect,
                   os.path.join(datadir, 'audio', fn), sample_rate, frame_len,
                   fps))

    # prepare mel filterbank
    filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands,
                                             mel_min, mel_max)
    filterbank = filterbank[:bin_mel_max].astype(floatX)

    # precompute mel spectra, if needed, otherwise just define a generator
    mel_spects = (np.log(
        np.maximum(np.dot(spect[:, :bin_mel_max], filterbank), 1e-7))
                  for spect in spects)

    # load mean/std or compute it, if not computed yet
    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % args.dataset)
    with np.load(meanstd_file) as f:
        mean = f['mean']
        std = f['std']
    mean = mean.astype(floatX)
    istd = np.reciprocal(std).astype(floatX)

    print("Preparing training data feed...")
    # normalised mel spects, without data augmentation
    mel_spects = [(spect - mean) * istd for spect in mel_spects]

    # we create two theano functions
    # the first one uses pre-trained classifier to generate features and predictions
    # the second one uses pre-trained inverter to generate mel spectrograms from input features

    # classifier (discriminator) model
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(
        0, 'x', 1, 2
    )  # insert "channels" dimension, changes a 32 x 115 x 80 input to 32 x 1 x 115 x 80 input which is fed to the CNN

    network = model.architecture(inputs, (None, 1, blocklen, mel_bands))

    # load saved weights
    with np.load(args.classifier) as f:
        lasagne.layers.set_all_param_values(
            network['fc9'], [f['param%d' % i] for i in range(len(f.files))])

    # create output expression
    outputs_score = lasagne.layers.get_output(network[args.layer],
                                              deterministic=True)
    outputs_pred = lasagne.layers.get_output(network['fc9'],
                                             deterministic=True)

    # prepare and compile prediction function
    print("Compiling classifier function...")
    pred_fn_score = theano.function([input_var],
                                    outputs_score,
                                    allow_input_downcast=True)
    pred_fn = theano.function([input_var],
                              outputs_pred,
                              allow_input_downcast=True)

    # inverter (generator) model
    if (args.layer == 'fc8') or (args.layer == 'fc7'):
        input_var_deconv = T.matrix('input_var_deconv')
    else:
        input_var_deconv = T.tensor4('input_var_deconv')

    # inverter (generator) model
    if (args.layer == 'fc8'):
        gen_network = upconv.architecture_upconv_fc8(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1]))
    elif args.layer == 'fc7':
        gen_network = upconv.architecture_upconv_fc7(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1]))
    elif args.layer == 'mp6':
        gen_network = upconv.architecture_upconv_mp6(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'conv5':
        gen_network = upconv.architecture_upconv_conv5(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'conv4':
        gen_network = upconv.architecture_upconv_conv4(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'mp3':
        gen_network = upconv.architecture_upconv_mp3(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    elif args.layer == 'conv2':
        gen_network = upconv.architecture_upconv_conv2(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)
    else:
        gen_network = upconv.architecture_upconv_conv1(
            input_var_deconv,
            (batchsize, lasagne.layers.get_output_shape(
                network[args.layer])[1],
             lasagne.layers.get_output_shape(network[args.layer])[2],
             lasagne.layers.get_output_shape(network[args.layer])[3]),
            args.n_conv_layers, args.n_conv_filters)

    # load saved weights
    with np.load(args.inverter) as f:
        lasagne.layers.set_all_param_values(
            gen_network, [f['param%d' % i] for i in range(len(f.files))])

    # create cost expression
    outputs = lasagne.layers.get_output(gen_network, deterministic=True)
    print("Compiling inverter function...")
    test_fn = theano.function([input_var_deconv],
                              outputs,
                              allow_input_downcast=True)

    # instance-based feature inversion
    # (1) pick a file from a dataset (e.g., dataset: Jamendo test) (2) select a time index to read the instance
    file_idx = np.arange(0, len(filelist))
    hop_size = sample_rate / fps  # samples

    for file_instance in file_idx:
        print("<<<<Analysis for the file: %d>>>>" % (file_instance + 1))
        time_idx = np.random.randint(
            start_offset, end_offset, 1
        )[0]  # provides a random integer start position between start and end offsets

        # generate excerpts for the selected file_idx
        # excerpts is a 3-d array of shape: num_excerpts x blocklen x mel_spects_dimensions
        num_excerpts = len(mel_spects[file_instance]) - blocklen + 1
        print("Number of excerpts in the file :%d" % num_excerpts)
        excerpts = np.lib.stride_tricks.as_strided(
            mel_spects[file_instance],
            shape=(num_excerpts, blocklen, mel_spects[file_instance].shape[1]),
            strides=(mel_spects[file_instance].strides[0],
                     mel_spects[file_instance].strides[0],
                     mel_spects[file_instance].strides[1]))

        # convert the time_idx to the excerpt index
        excerpt_idx = int(np.round((time_idx * sample_rate) / (hop_size)))
        print("Time_idx: %f secs, Excerpt_idx: %d" % (time_idx, excerpt_idx))
        if ((excerpt_idx + batchsize) > num_excerpts):
            print(
                "------------------Number of excerpts are less for file: %d--------------------"
                % (file_instance + 1))
            break

        # generating feature representations for the select excerpt.
        # CAUTION: Need to feed mini-batch to pre-trained model, so (mini_batch-1) following excerpts are also fed, but are not analysed
        # classifier can have less than minibatch data, but the inverter needs a batch of data to make prediction (comes from how the inverter was trained)
        scores = pred_fn_score(excerpts[excerpt_idx:excerpt_idx + batchsize])
        #print("Feature"),
        #print(scores[file_idx])

        predictions = pred_fn(excerpts[excerpt_idx:excerpt_idx + batchsize])
        #print("Prediction:%f" %(predictions[0][0]))

        mel_predictions = np.squeeze(
            test_fn(scores), axis=1
        )  # mel_predictions is a 3-d array of shape batch_size x blocklen x n_mels

        # saves plots for the input Mel spectrogram and its inverted representation
        # all plots are normalised in [0, 1] range
        plots.plot_figures(utils.normalise(excerpts[excerpt_idx]),
                           utils.normalise(mel_predictions[0]),
                           predictions[0][0], file_instance, excerpt_idx,
                           args.results_dir, args.layer)
コード例 #6
0
def main():
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    outfile = options.outfile

    # read configuration files and immediate settings
    cfg = {}
    if os.path.exists(modelfile + '.vars'):
        options.vars.insert(1, modelfile + '.vars')
    for fn in options.vars:
        cfg.update(config.parse_config_file(fn))
    cfg.update(config.parse_variable_assignments(options.var))

    # read some settings into local variables
    sample_rate = cfg['sample_rate']
    frame_len = cfg['frame_len']
    fps = cfg['fps']
    mel_bands = cfg['mel_bands']
    mel_min = cfg['mel_min']
    mel_max = cfg['mel_max']
    blocklen = cfg['blocklen']
    batchsize = cfg['batchsize']

    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    print("Preparing data reading...")
    datadir = os.path.join(os.path.dirname(__file__), os.path.pardir,
                           'datasets', options.dataset)

    # - load filelist
    filelist = []
    for d in options.filelists.split(','):
        with io.open(os.path.join(datadir, 'filelists', d)) as f:
            filelist.extend(l.rstrip() for l in f if l.rstrip())

    # - create generator for spectra
    spects = (cached(
        options.cache_spectra
        and os.path.join(options.cache_spectra, fn + '.npy'),
        audio.extract_spect, os.path.join(datadir, 'audio',
                                          fn), sample_rate, frame_len, fps)
              for fn in filelist)

    # - pitch-shift if needed
    if options.pitchshift:
        import scipy.ndimage
        spline_order = 2
        spects = (scipy.ndimage.affine_transform(
            spect, (1, 1 / (1 + options.pitchshift / 100.)),
            output_shape=(len(spect), mel_max),
            order=spline_order) for spect in spects)

    # - define generator for cropped spectra
    spects = (spect[:, :bin_mel_max] for spect in spects)

    # - adjust loudness if needed
    if options.loudness:
        spects = (spect * float(10.**(options.loudness / 10.))
                  for spect in spects)

    # - define generator for silence-padding
    pad = np.zeros((blocklen // 2, bin_mel_max), dtype=floatX)
    spects = (np.concatenate((pad, spect, pad), axis=0) for spect in spects)

    # - we start the generator in a background thread (not required)
    spects = augment.generate_in_background([spects], num_cached=1)

    print("Preparing prediction function...")
    # instantiate neural network
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(0, 'x', 1, 2)  # insert "channels" dimension
    network = model.architecture(inputs, (None, 1, blocklen, bin_mel_max), cfg)

    # load saved weights
    with np.load(modelfile) as f:
        lasagne.layers.set_all_param_values(
            network, [f['param%d' % i] for i in range(len(f.files))])

    # performant way: convert to fully-convolutional network
    if not options.mem_use == 'low':
        import model_to_fcn
        network = model_to_fcn.model_to_fcn(network, allow_unlink=True)

    # create output expression
    outputs = lasagne.layers.get_output(network, deterministic=True)

    # prepare and compile prediction function
    print("Compiling prediction function...")
    test_fn = theano.function([input_var], outputs)

    # run prediction loop
    print("Predicting:")
    predictions = []
    for spect in progress(spects, total=len(filelist), desc='File '):
        if options.mem_use == 'high':
            # fastest way: pass full spectrogram through network at once
            preds = test_fn(spect[np.newaxis])  # insert batch dimension
        elif options.mem_use == 'mid':
            # performant way: pass spectrogram in equal chunks of up to one
            # minute, taking care to overlap by `blocklen // 2` frames and to
            # not pass a chunk shorter than `blocklen` frames
            chunks = np.ceil(len(spect) / (fps * 60.))
            hopsize = int(np.ceil(len(spect) / chunks))
            chunksize = hopsize + blocklen - 1
            preds = np.vstack(
                test_fn(spect[np.newaxis, pos:pos + chunksize])
                for pos in range(0, len(spect), hopsize))
        else:
            # naive way: pass excerpts of the size used during training
            # - view spectrogram memory as a 3-tensor of overlapping excerpts
            num_excerpts = len(spect) - blocklen + 1
            excerpts = np.lib.stride_tricks.as_strided(
                spect,
                shape=(num_excerpts, blocklen, spect.shape[1]),
                strides=(spect.strides[0], spect.strides[0], spect.strides[1]))
            # - pass mini-batches through the network and concatenate results
            preds = np.vstack(
                test_fn(excerpts[pos:pos + batchsize])
                for pos in range(0, num_excerpts, batchsize))
        predictions.append(preds)
        if options.plot:
            if spect.ndim == 3:
                spect = spect[0]  # remove channel axis
            spect = spect[blocklen // 2:-blocklen // 2]  # remove zero padding
            import matplotlib.pyplot as plt
            fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
            ax1.imshow(spect.T[::-1],
                       vmin=-3,
                       cmap='hot',
                       aspect='auto',
                       interpolation='nearest')
            ax2.plot(preds)
            ax2.set_ylim(0, 1.1)
            plt.show()

    # save predictions
    print("Saving predictions")
    data = dict(zip(filelist, predictions))
    if outfile.endswith('.pkl'):
        try:
            import cPickle as pickle
        except ImportError:
            import pickle
        with io.open(outfile, 'wb') as f:
            pickle.dump(data, f, protocol=-1)
    else:
        np.savez(outfile, **data)
コード例 #7
0
        temp_box = set(bbox)
        temp_cls = set(img_class)
        final_box = []
        final_class = []

        for i in temp_box:
            final_box.append(float(i))

        for j in temp_cls:
            final_class.append(int(j))

        print(
            "------------------------------------------------------------------------"
        )

        print(final_box, len(final_box), final_class, len(final_class))

        print(
            "------------------------------------------------------------------------"
        )
        if len(final_box) == 4:
            with tf.Session() as sess:
                my_net = architecture(sess, 1, 512, 3, len(final_class),
                                      len(final_box))
                my_net.initialise(sess)
                my_net.param(images, segmented, final_class,
                             np.asarray(final_box))
                sys.exit()
        else:
            print(len(final_box), " is more than or less than 4")
コード例 #8
0
def main():
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    outfile = options.outfile
    if options.split_pool and options.saliency:
        parser.error("--split-pool and --saliency cannot be combined.")

    # read configuration files and immediate settings
    cfg = {}
    if os.path.exists(modelfile + '.vars'):
        options.vars.insert(1, modelfile + '.vars')
    for fn in options.vars:
        cfg.update(config.parse_config_file(fn))
    cfg.update(config.parse_variable_assignments(options.var))

    # read some settings into local variables
    fps = cfg['fps']
    len_min = cfg['len_min']
    len_max = cfg['len_max']

    # prepare dataset
    print("Preparing data reading...")
    datadir = os.path.join(os.path.dirname(__file__),
                           os.path.pardir, 'datasets', options.dataset)

    # - load filelists
    filelist = []
    for d in options.filelists.split(','):
        with io.open(os.path.join(datadir, 'filelists', d)) as f:
            filelist.extend(l.rstrip() for l in f if l.rstrip())

    # - create data feed
    feed, input_formats = data.prepare_datafeed(filelist, datadir, 'test', cfg)

    # - we start the generator in a background thread
    if not options.plot:
        batches = augment.generate_in_background([data.run_datafeed(feed, cfg)],
                                                 num_cached=1)
    else:
        # unless we're plotting; this would mess up the progress counter
        batches = data.run_datafeed(feed, cfg)

    print("Preparing prediction function...")
    # instantiate neural network
    input_vars = {name: T.TensorType(str(np.dtype(dtype)),
                                     (False,) * len(shape))(name)
                  for name, (dtype, shape) in input_formats.items()}
    input_shapes = {name: shape
                    for name, (dtype, shape) in input_formats.items()}
    network = model.architecture(input_vars, input_shapes, cfg)
    if isinstance(network, list) and not options.include_side_outputs:
        network = network[0]  # only use the main output

    # load saved weights
    with np.load(modelfile, encoding='latin1') as f:
        lasagne.layers.set_all_param_values(
                network, [f['param%d' % i] for i in range(len(f.files))])

    # insert guided backprop, if needed for saliency
    if options.saliency:
        from gbprop import replace_nonlinearities
        replace_nonlinearities(network, lasagne.nonlinearities.leaky_rectify)

    # create output expression(s)
    if options.split_pool:
        network_end = network
        network = next(l for l in lasagne.layers.get_all_layers(network)[::-1]
                       if l.name == 'before_pool')
    outputs = lasagne.layers.get_output(network, deterministic=True)
    if options.split_pool:
        split_input_var = T.tensor4('input2')
        split_outputs = lasagne.layers.get_output(
            network_end, {network: split_input_var}, deterministic=True)
        split_input_vars = [v for v in theano.gof.graph.inputs([split_outputs])
                            if not isinstance(v, theano.compile.SharedVariable)
                            and not isinstance(v, theano.tensor.Constant)]

    # create saliency map expression, if needed
    if options.saliency:
        saliency = theano.grad(outputs[:, options.saliency].sum(), input_vars['spect'])
        outputs = outputs + [saliency] if isinstance(outputs, list) else [outputs, saliency]

    # prepare and compile prediction function
    print("Compiling prediction function...")
    test_fn = theano.function(list(input_vars.values()), outputs,
                              on_unused_input='ignore')
    if options.split_pool:
        pool_fn = theano.function(split_input_vars, split_outputs,
                                  on_unused_input='ignore')

    # prepare plotting, if needed
    if options.plot:
        import matplotlib
        if os.environ.get('MPLBACKEND'):
            matplotlib.use(os.environ['MPLBACKEND'])  # for old versions
        import matplotlib.pyplot as plt
        with open(os.path.join(datadir, 'labels', 'labelset'), 'rb') as f:
            labelset = [l.rstrip('\r\n') for l in f]

    # run prediction loop
    print("Predicting:")
    predictions = []
    for batch in batches:
        spect = batch.pop('spect')
        if spect.shape[-2] <= len_max * fps or len_max == 0:
            # predict on full spectrogram at once
            preds = test_fn(spect=spect, **batch)
        else:
            # predict in segments of len_max, with overlap len_min
            # drop any reminder shorter than len_min (len_max if len_min == 0)
            preds = [test_fn(spect=spect[..., pos:pos + len_max * fps, :],
                     **batch)
                     for pos in range(0, (spect.shape[-2] + 1 -
                                          (len_min or len_max) * fps),
                                      (len_max - len_min) * fps)]
            if isinstance(preds[0], list):
                preds = [np.concatenate(p, axis=2 if p[0].ndim > 2 else 0)
                         for p in zip(*preds)]
            else:
                preds = np.concatenate(preds,
                                       axis=2 if preds[0].ndim > 2 else 0)
        if cfg['arch.pool'] == 'none' or '_nopool' in cfg['arch.pool']:
            if isinstance(preds, list):
                preds = [p[0, :, :, 0].T if p.ndim == 4 else p for p in preds]
            else:
                preds = preds[0, :, :, 0].T
        elif options.split_pool:
            preds = pool_fn(preds, **batch)
        predictions.append(preds)
        if options.plot:
            if spect.ndim == 4:
                spect = spect[0]  # remove batch axis
            if spect.ndim == 3:
                spect = spect[0]  # remove channel axis
            if isinstance(preds, list):
                preds, sides = preds[0], preds[1:]
            else:
                sides = []
            fig, axs = plt.subplots(2 + len(sides), 1, sharex=True)
            axs[0].imshow(np.log1p(1e-3 * spect).T[::-1], cmap='hot',
                          aspect='auto', interpolation='nearest')
            K = 5
            top_k = lme(preds, axis=0).argpartition(preds.shape[1] - 1 -
                                                    np.arange(K))[::-1][:K]
            #top_k = (preds * softmax(sides[0], axis=0).mean(axis=1, keepdims=True)).sum(axis=0).argpartition(preds.shape[1] - 1 - np.arange(K))[::-1][:K]
            #top_k = softmax(preds, axis=-1).max(axis=0).argpartition(preds.shape[1] - 1 - np.arange(K))[::-1][:K]
            #top_k[-1] = labelset.index('mphbjm')
            preds = softmax(preds, axis=-1)
            x = np.arange(len(preds)) * (len(spect) / float(len(preds)))
            for k in top_k:
                axs[1].plot(x, preds[:, k], label=labelset[k])
            #axs[1].set_ylim(0, 1.1)
            axs[1].legend(loc='best')
            for side, ax in zip(sides, axs[2:]):
                side = softmax(side, axis=0)
                ax.plot(x, side)
            plt.show()

    # save predictions
    print("Saving predictions")
    predictions = dict(zip(filelist, predictions))
    if outfile.endswith('.pkl'):
        try:
            import cPickle as pickle
        except ImportError:
            import pickle
        with io.open(outfile, 'wb') as f:
            pickle.dump(predictions, f, protocol=-1)
    else:
        np.savez(outfile, **predictions)
コード例 #9
0
def main():
    # parse the command line arguments
    parser = utils.argument_parser()
    args = parser.parse_args()

    print("-------------------------------")
    print("classifier:%s" % args.classifier)
    print("inverter:%s" % args.inverter)
    print("dataset_path:%s" % args.dataset_path)
    print("dataset name:%s" % args.dataset)
    print("results path:%s" % args.results_dir)
    print("quantitative analysis:%s" % args.quant_analysis)
    print("mask inversion flag: %r" % args.mask_inv_flag)
    print("plot quant results case: %r" % args.plot_quant_res)
    print("-------------------------------")

    # just plots the quantitative analysis results and exits
    if args.plot_quant_res:
        # jamendo results
        exp_loss_jamendo_case1 = [
            0, 6.48, 10.87, 13.33, 15.51, 19.15, 25.94, 37.56, 49.11, 56.85,
            57.77
        ]  #, 57.77]
        exp_loss_jamendo_case2 = [
            57.77, 59.19, 58.11, 51.81, 43.1, 31.87, 22.84, 15.51, 11.03, 5.86,
            0.03
        ]  #, 0]
        rel_area_jamendo_case1 = [100, 96, 87, 77, 65, 53, 39, 26, 14, 4,
                                  0]  #, 0]
        rel_area_jamendo_case2 = [0, 4, 13, 23, 35, 47, 61, 74, 86, 96,
                                  100]  #, 100]
        exp_losses_jamendo = [exp_loss_jamendo_case1, exp_loss_jamendo_case2]
        rel_areas_jamendo = [rel_area_jamendo_case1, rel_area_jamendo_case2]

        # rwc results
        exp_loss_rwc_case1 = [
            0, 6.52, 10.9, 13.39, 15.87, 21.28, 30.92, 43.22, 53.41, 60.85,
            63.66
        ]  #, 63.66]
        exp_loss_rwc_case2 = [
            63.66, 64.5, 61.01, 52.55, 39.39, 26.27, 16.13, 9.55, 5.05, 2.26,
            0.03
        ]  #, 0]
        rel_area_rwc_case1 = [100, 96, 87, 75, 61, 47, 33, 20, 10, 3, 0]  #, 0]
        rel_area_rwc_case2 = [0, 4, 13, 25, 39, 53, 67, 80, 90, 97,
                              100]  #, 100]
        exp_losses_rwc = [exp_loss_rwc_case1, exp_loss_rwc_case2]
        rel_areas_rwc = [rel_area_rwc_case1, rel_area_rwc_case2]

        plots.quant_eval(exp_losses_jamendo, rel_areas_jamendo, exp_losses_rwc,
                         rel_areas_rwc, args.results_dir)
        exit(0)

    # default parameters
    sample_rate = 22050
    frame_len = 1024
    fps = 70
    mel_bands = 80
    mel_min = 27.5
    mel_max = 8000
    blocklen = 115
    batchsize = 32

    # single instance inversion/quantitative analysis parameters
    preds_before = []
    if not args.quant_analysis:
        time_index = 10
        masking_threshold = [0.7]
        duration = 0  # no use
        increment = 0.5
    else:
        preds_after = []
        area_per_instance = []
        result = []
        start_offset = 5
        end_offset = 20
        duration = 200
        increment = 0.5
        masking_threshold = np.arange(0.0, 1.2, 0.1)
        class_threshold = 0.66  # Calculated over Jamendo validation dataset

    # printing and plotting parameters
    df = True
    #inp = []
    #expns =[]

    bin_nyquist = frame_len // 2 + 1
    bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__), args.dataset_path,
                           'datasets', args.dataset)

    # load filelist
    with io.open(os.path.join(datadir, 'filelists', 'test')) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]

    # compute spectra
    print("Computing%s spectra..." %
          (" or loading" if args.cache_spectra else ""))

    spects = [
    ]  # list of tuples, where each tuple has magnitude and phase information for one audio file
    for fn in progress(filelist, 'File '):
        cache_fn = (args.cache_spectra
                    and os.path.join(args.cache_spectra, fn + '.npy'))
        spects.append(
            cached(cache_fn, audio.extract_spect,
                   os.path.join(datadir, 'audio', fn), sample_rate, frame_len,
                   fps))

    # prepare mel filterbank
    filterbank = audio.create_mel_filterbank(sample_rate, frame_len, mel_bands,
                                             mel_min, mel_max)
    filterbank = filterbank[:bin_mel_max].astype(floatX)

    # precompute mel spectra, if needed, otherwise just define a generator
    mel_spects = (np.log(
        np.maximum(np.dot(spect[:, :bin_mel_max], filterbank), 1e-7))
                  for spect in spects)

    # load mean/std or compute it, if not computed yet
    meanstd_file = os.path.join(os.path.dirname(__file__),
                                '%s_meanstd.npz' % args.dataset)
    with np.load(meanstd_file) as f:
        mean = f['mean']
        std = f['std']
    mean = mean.astype(floatX)
    istd = np.reciprocal(std).astype(floatX)

    print("Preparing training data feed...")
    # normalised mel spects, without data augmentation
    mel_spects = [(spect - mean) * istd for spect in mel_spects]

    # we create two theano functions
    # the first one uses pre-trained classifier to generate features and predictions
    # the second one uses pre-trained inverter to generate mel spectrograms from input features

    # classifier (discriminator) model
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(
        0, 'x', 1, 2
    )  # insert "channels" dimension, changes a 32 x 115 x 80 input to 32 x 1 x 115 x 80 input which is fed to the CNN

    network = model.architecture(inputs, (None, 1, blocklen, mel_bands))

    # load saved weights
    with np.load(args.classifier) as f:
        lasagne.layers.set_all_param_values(
            network['fc9'], [f['param%d' % i] for i in range(len(f.files))])

    # create output expression
    outputs_score = lasagne.layers.get_output(network['fc8'],
                                              deterministic=True)
    outputs_pred = lasagne.layers.get_output(network['fc9'],
                                             deterministic=True)

    # prepare and compile prediction function
    print("Compiling classifier function...")
    pred_fn_score = theano.function([input_var],
                                    outputs_score,
                                    allow_input_downcast=True)
    pred_fn = theano.function([input_var],
                              outputs_pred,
                              allow_input_downcast=True)

    # inverter (generator) model
    input_var_deconv = T.matrix('input_var_deconv')

    # inverter (generator) model
    gen_network = upconv.architecture_upconv_fc8(
        input_var_deconv,
        (batchsize, lasagne.layers.get_output_shape(network['fc8'])[1]))

    # load saved weights
    with np.load(args.inverter) as f:
        lasagne.layers.set_all_param_values(
            gen_network, [f['param%d' % i] for i in range(len(f.files))])

    # create cost expression
    outputs = lasagne.layers.get_output(gen_network, deterministic=True)
    print("Compiling inverter function...")
    test_fn = theano.function([input_var_deconv],
                              outputs,
                              allow_input_downcast=True)

    # instance-based feature inversion
    # (1) pick a file from a dataset (e.g., dataset: Jamendo test) (2) select a time index to read the instance
    file_idx = np.arange(0, len(filelist))
    hop_size = sample_rate / fps  # samples

    for mt in masking_threshold:

        np.random.seed(0)

        print("\n ++++++ Masking threshold: %f +++++\n " % (mt))

        for file_instance in file_idx:

            print("<<<<Analysis for the file: %d>>>>" % (file_instance + 1))

            if args.quant_analysis:
                time_idx = np.random.randint(
                    start_offset, end_offset, 1
                )[0]  # provides a random integer start position between start and end offsets
            else:
                time_idx = time_index

            td = time_idx  # no use for the single instance inversion case.

            # generate excerpts for the selected file_idx
            # excerpts is a 3-d array of shape: num_excerpts x blocklen x mel_spects_dimensions
            num_excerpts = len(mel_spects[file_instance]) - blocklen + 1
            print("Number of excerpts in the file :%d" % num_excerpts)
            excerpts = np.lib.stride_tricks.as_strided(
                mel_spects[file_instance],
                shape=(num_excerpts, blocklen,
                       mel_spects[file_instance].shape[1]),
                strides=(mel_spects[file_instance].strides[0],
                         mel_spects[file_instance].strides[0],
                         mel_spects[file_instance].strides[1]))

            while (time_idx <= td + duration):
                # convert the time_idx to the excerpt index
                excerpt_idx = int(
                    np.round((time_idx * sample_rate) / (hop_size)))
                print("Time_idx: %.2f secs, Excerpt_idx: %d" %
                      (time_idx, excerpt_idx))
                if ((excerpt_idx + batchsize) > num_excerpts):
                    print(
                        "------------------Number of excerpts are less for file: %d--------------------"
                        % (file_instance + 1))
                    break

                # generating feature representations for the select excerpt.
                # CAUTION: Need to feed mini-batch to the pre-trained model, so (mini_batch-1) following excerpts are also fed, but are not analysed
                # classifier can have less than minibatch data, but the inverter needs a batch of data to make prediction (comes from how the inverter was trained)
                scores = pred_fn_score(excerpts[excerpt_idx:excerpt_idx +
                                                batchsize])
                #print("Feature"),
                #print(scores[file_idx])

                predictions = pred_fn(excerpts[excerpt_idx:excerpt_idx +
                                               batchsize])
                print("Prediction score for the excerpt without masking:%f" %
                      (predictions[0][0]))
                preds_before.append(predictions[0][0])

                mel_predictions = np.squeeze(
                    test_fn(scores), axis=1
                )  # mel_predictions is a 3-d array of shape batch_size x blocklen x n_mels

                # normalising the inverted mel to create a map, and use the map to cut the section in the input mel
                norm_inv = utils.normalise(mel_predictions[0])
                norm_inv[norm_inv < mt] = 0  # Binary mask-----
                norm_inv[norm_inv >= mt] = 1

                if args.quant_analysis:
                    # calculate area
                    area = utils.area_calculation(norm_inv, debug_flag=df)

                    # reversing the mask to keep the portions that seem not useful for the current instance prediction
                    norm_inv, area = utils.invert_mask(
                        mask=norm_inv,
                        mask_inv_flag=args.mask_inv_flag,
                        area_mask=area,
                        debug_flag=df)

                # masking out the input based on the mask created above
                masked_input = np.zeros((batchsize, blocklen, mel_bands))
                masked_input[0] = norm_inv * excerpts[excerpt_idx]

                if args.quant_analysis:
                    # save the area enabled
                    area_per_instance.append(area)
                    # feed the updated input to regenerate prediction
                    # just changing the first input.
                    predictions = pred_fn(masked_input)
                    print(
                        "Predictions score for the excerpt after masking:%f\n"
                        % (predictions[0][0]))
                    preds_after.append(predictions[0][0])

                if not args.quant_analysis:  # save results
                    # saves plots for the input Mel spectrogram and its inverted representation
                    # all plots are normalised in [0, 1] range
                    plots.single_inst_inv(
                        utils.normalise(excerpts[excerpt_idx]),
                        utils.normalise(mel_predictions[0]), norm_inv,
                        utils.normalise(masked_input[0]), preds_before[0],
                        file_instance, excerpt_idx, args.results_dir, 'FC8')

                time_idx += increment

            # plotting figure 6.4 in thesis
            #plots.input_mels()

            # plotting figure 6.6 in thesis
            '''inp.append(excerpts[excerpt_idx])
            expns.append(masked_input[0])
            preds_before.append(predictions[0][0])
        plots.special_cases(utils.normalise(inp[0]), utils.normalise(expns[0]), utils.normalise(inp[1]), utils.normalise(expns[1]), preds_before[0], preds_before[1], file_instance, excerpt_idx, args.results_dir)'''

        if args.quant_analysis:
            res_tuple = utils.quant_result_analysis(preds_before,
                                                    preds_after,
                                                    area_per_instance,
                                                    mt,
                                                    class_threshold,
                                                    debug_flag=df)
            result.append(res_tuple)  # one result per threshold value

        # clearing the lists for the next iteration
        preds_before = []
        preds_after = []
        area_per_instance = []

    if args.quant_analysis:
        # save the quantitative analysis results
        quant_result_columns = [
            'threshold', 'total instances', 'total fails',
            'explanation loss [%]', 'average area'
        ]
        with open(args.results_dir + '/' + 'quant_analysis_result.csv',
                  'w') as fp:
            results_writer = csv.writer(fp, delimiter=',')
            results_writer.writerow(quant_result_columns)
            for result_th in result:
                results_writer.writerow(result_th)
コード例 #10
0
ファイル: train.py プロジェクト: lvaleriu/ismir2018-1
def main():
    # parse command line
    parser = opts_parser()
    options = parser.parse_args()
    modelfile = options.modelfile
    if options.load_spectra != 'memory' and not options.cache_spectra:
        parser.error('option --load-spectra=%s requires --cache-spectra' %
                     options.load_spectra)

    # read configuration files and immediate settings
    cfg = {}
    for fn in options.vars:
        cfg.update(config.parse_config_file(fn))
    cfg.update(config.parse_variable_assignments(options.var))

    # read some settings into local variables
    sample_rate = cfg['sample_rate']
    frame_len = cfg['frame_len']
    fps = cfg['fps']
    mel_bands = cfg['mel_bands']
    mel_min = cfg['mel_min']
    mel_max = cfg['mel_max']
    blocklen = cfg['blocklen']
    batchsize = cfg['batchsize']

    bin_nyquist = frame_len // 2 + 1
    if cfg['filterbank'] == 'mel_learn':
        bin_mel_max = bin_nyquist
    else:
        bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate

    # prepare dataset
    datadir = os.path.join(os.path.dirname(__file__), os.path.pardir,
                           'datasets', options.dataset)

    # - load filelist
    with io.open(
            os.path.join(datadir, 'filelists',
                         cfg.get('filelist.train', 'train'))) as f:
        filelist = [l.rstrip() for l in f if l.rstrip()]
    if options.validate:
        with io.open(
                os.path.join(datadir, 'filelists',
                             cfg.get('filelist.valid', 'valid'))) as f:
            filelist_val = [l.rstrip() for l in f if l.rstrip()]
        filelist.extend(filelist_val)
    else:
        filelist_val = []

    # - compute spectra
    print("Computing%s spectra..." %
          (" or loading" if options.cache_spectra else ""))
    spects = []
    for fn in progress(filelist, 'File '):
        cache_fn = (options.cache_spectra
                    and os.path.join(options.cache_spectra, fn + '.npy'))
        spects.append(
            cached(cache_fn,
                   audio.extract_spect,
                   os.path.join(datadir, 'audio', fn),
                   sample_rate,
                   frame_len,
                   fps,
                   loading_mode=options.load_spectra))

    # - load and convert corresponding labels
    print("Loading labels...")
    labels = []
    for fn, spect in zip(filelist, spects):
        fn = os.path.join(datadir, 'labels', fn.rsplit('.', 1)[0] + '.lab')
        with io.open(fn) as f:
            segments = [l.rstrip().split() for l in f if l.rstrip()]
        segments = [(float(start), float(end), label == 'sing')
                    for start, end, label in segments]
        timestamps = np.arange(len(spect)) / float(fps)
        labels.append(create_aligned_targets(segments, timestamps, np.bool))

    # - split off validation data, if needed
    if options.validate:
        spects_val = spects[-len(filelist_val):]
        spects = spects[:-len(filelist_val)]
        labels_val = labels[-len(filelist_val):]
        labels = labels[:-len(filelist_val)]

    # - prepare training data generator
    print("Preparing training data feed...")
    if not options.augment:
        # Without augmentation, we just create a generator that returns
        # mini-batches of random excerpts
        batches = augment.grab_random_excerpts(spects, labels, batchsize,
                                               blocklen, bin_mel_max)
        batches = augment.generate_in_background([batches], num_cached=15)
    else:
        # For time stretching and pitch shifting, it pays off to preapply the
        # spline filter to each input spectrogram, so it does not need to be
        # applied to each mini-batch later.
        spline_order = cfg['spline_order']
        if spline_order > 1 and options.load_spectra == 'memory':
            from scipy.ndimage import spline_filter
            spects = [
                spline_filter(spect, spline_order).astype(floatX)
                for spect in spects
            ]
            prefiltered = True
        else:
            prefiltered = False

        # We define a function to create the mini-batch generator. This allows
        # us to easily create multiple generators for multithreading if needed.
        def create_datafeed(spects, labels):
            # With augmentation, as we want to apply random time-stretching,
            # we request longer excerpts than we finally need to return.
            max_stretch = cfg['max_stretch']
            batches = augment.grab_random_excerpts(
                spects,
                labels,
                batchsize=batchsize,
                frames=int(blocklen / (1 - max_stretch)))

            # We wrap the generator in another one that applies random time
            # stretching and pitch shifting, keeping a given number of frames
            # and bins only.
            max_shift = cfg['max_shift']
            batches = augment.apply_random_stretch_shift(
                batches,
                max_stretch,
                max_shift,
                keep_frames=blocklen,
                keep_bins=bin_mel_max,
                order=spline_order,
                prefiltered=prefiltered)

            # We apply random frequency filters
            max_db = cfg['max_db']
            batches = augment.apply_random_filters(batches, mel_max, max_db)

            # We apply random loudness changes
            max_loudness = cfg['max_loudness']
            if max_loudness:
                batches = augment.apply_random_loudness(batches, max_loudness)

            return batches

        # We start the mini-batch generator and augmenter in one or more
        # background threads or processes (unless disabled).
        bg_threads = cfg['bg_threads']
        bg_processes = cfg['bg_processes']
        if not bg_threads and not bg_processes:
            # no background processing: just create a single generator
            batches = create_datafeed(spects, labels)
        elif bg_threads:
            # multithreading: create a separate generator per thread
            batches = augment.generate_in_background(
                [create_datafeed(spects, labels) for _ in range(bg_threads)],
                num_cached=bg_threads * 5)
        elif bg_processes:
            # multiprocessing: single generator is forked along with processes
            batches = augment.generate_in_background(
                [create_datafeed(spects, labels)] * bg_processes,
                num_cached=bg_processes * 25,
                in_processes=True)

    print("Preparing training function...")
    # instantiate neural network
    input_var = T.tensor3('input')
    inputs = input_var.dimshuffle(0, 'x', 1, 2)  # insert "channels" dimension
    network = model.architecture(inputs, (None, 1, blocklen, bin_mel_max), cfg)
    print(
        "- %d layers (%d with weights), %f mio params" %
        (len(lasagne.layers.get_all_layers(network)),
         sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)),
         lasagne.layers.count_params(network, trainable=True) / 1e6))
    print("- weight shapes: %r" % [
        l.W.get_value().shape for l in lasagne.layers.get_all_layers(network)
        if hasattr(l, 'W') and hasattr(l.W, 'get_value')
    ])

    # create cost expression
    target_var = T.vector('targets')
    targets = (0.02 + 0.96 * target_var)  # map 0 -> 0.02, 1 -> 0.98
    targets = targets.dimshuffle(0, 'x')  # turn into column vector
    outputs = lasagne.layers.get_output(network, deterministic=False)
    cost = T.mean(lasagne.objectives.binary_crossentropy(outputs, targets))
    if cfg.get('l2_decay', 0):
        cost_l2 = lasagne.regularization.regularize_network_params(
            network, lasagne.regularization.l2) * cfg['l2_decay']
    else:
        cost_l2 = 0

    # prepare and compile training function
    params = lasagne.layers.get_all_params(network, trainable=True)
    initial_eta = cfg['initial_eta']
    eta_decay = cfg['eta_decay']
    eta_decay_every = cfg.get('eta_decay_every', 1)
    patience = cfg.get('patience', 0)
    trials_of_patience = cfg.get('trials_of_patience', 1)
    patience_criterion = cfg.get(
        'patience_criterion',
        'valid_loss' if options.validate else 'train_loss')
    momentum = cfg['momentum']
    first_params = params[:cfg['first_params']]
    first_params_eta_scale = cfg['first_params_eta_scale']
    if cfg['learn_scheme'] == 'nesterov':
        learn_scheme = lasagne.updates.nesterov_momentum
    elif cfg['learn_scheme'] == 'momentum':
        learn_scheme = lasagne.update.momentum
    elif cfg['learn_scheme'] == 'adam':
        learn_scheme = lasagne.updates.adam
    else:
        raise ValueError('Unknown learn_scheme=%s' % cfg['learn_scheme'])
    eta = theano.shared(lasagne.utils.floatX(initial_eta))
    if not first_params or first_params_eta_scale == 1:
        updates = learn_scheme(cost + cost_l2, params, eta, momentum)
    else:
        grads = theano.grad(cost + cost_l2, params)
        updates = learn_scheme(grads[len(first_params):],
                               params[len(first_params):], eta, momentum)
        if first_params_eta_scale > 0:
            updates.update(
                learn_scheme(grads[:len(first_params)], first_params,
                             eta * first_params_eta_scale, momentum))
    print("Compiling training function...")
    train_fn = theano.function([input_var, target_var], cost, updates=updates)

    # prepare and compile validation function, if requested
    if options.validate:
        print("Compiling validation function...")
        import model_to_fcn
        network_test = model_to_fcn.model_to_fcn(network, allow_unlink=False)
        outputs_test = lasagne.layers.get_output(network_test,
                                                 deterministic=True)
        cost_test = T.mean(
            lasagne.objectives.binary_crossentropy(outputs_test, targets))
        val_fn = theano.function([input_var, target_var],
                                 [cost_test, outputs_test])

    # run training loop
    print("Training:")
    epochs = cfg['epochs']
    epochsize = cfg['epochsize']
    batches = iter(batches)
    if options.save_errors:
        errors = []
    if first_params and cfg['first_params_log']:
        first_params_hist = []
    if patience > 0:
        best_error = np.inf
        best_state = get_state(network, updates)
    for epoch in range(epochs):
        # actual training
        err = 0
        for batch in progress(range(epochsize),
                              min_delay=.5,
                              desc='Epoch %d/%d: Batch ' %
                              (epoch + 1, epochs)):
            err += train_fn(*next(batches))
            if not np.isfinite(err):
                print("\nEncountered NaN loss in training. Aborting.")
                sys.exit(1)
            if first_params and cfg['first_params_log'] and (
                    batch % cfg['first_params_log'] == 0):
                first_params_hist.append(
                    tuple(param.get_value() for param in first_params))
                np.savez(
                    modelfile[:-4] + '.hist.npz', **{
                        'param%d' % i: param
                        for i, param in enumerate(zip(*first_params_hist))
                    })

        # report training loss
        print("Train loss: %.3f" % (err / epochsize))
        if options.save_errors:
            errors.append(err / epochsize)

        # compute and report validation loss, if requested
        if options.validate:
            val_err = 0
            preds = []
            max_len = int(fps * cfg.get('val.max_len', 30))
            for spect, label in zip(spects_val, labels_val):
                # pick excerpt of val.max_len seconds in center of file
                excerpt = slice(max(0, (len(spect) - max_len) // 2),
                                (len(spect) + max_len) // 2)
                # crop to maximum length and required spectral bins
                spect = spect[None, excerpt, :bin_mel_max]
                # crop to maximum length and remove edges lost in the network
                label = label[excerpt][blocklen // 2:-(blocklen // 2)]
                e, pred = val_fn(spect, label)
                val_err += e
                preds.append((pred[:, 0], label))
            print("Validation loss: %.3f" % (val_err / len(filelist_val)))
            from eval import evaluate
            _, results = evaluate(*zip(*preds))
            print("Validation error: %.3f" % (1 - results['accuracy']))
            if options.save_errors:
                errors.append(val_err / len(filelist_val))
                errors.append(1 - results['accuracy'])

        # update learning rate and/or apply early stopping, if needed
        if patience > 0:
            if patience_criterion == 'train_loss':
                cur_error = err / epochsize
            elif patience_criterion == 'valid_loss':
                cur_error = val_err / len(filelist_val)
            elif patience_criterion == 'valid_error':
                cur_error = 1 - results['accuracy']
            if cur_error <= best_error:
                best_error = cur_error
                best_state = get_state(network, updates)
                patience = cfg['patience']
            else:
                patience -= 1
                if patience == 0:
                    if eta_decay_every == 'trial_of_patience' and eta_decay != 1:
                        eta.set_value(eta.get_value() *
                                      lasagne.utils.floatX(eta_decay))
                    restore_state(network, updates, best_state)
                    patience = cfg['patience']
                    trials_of_patience -= 1
                    print("Lost patience (%d remaining trials)." %
                          trials_of_patience)
                    if trials_of_patience == 0:
                        break
        if eta_decay_every != 'trial_of_patience' and eta_decay != 1 and \
                (epoch + 1) % eta_decay_every == 0:
            eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay))

    # save final network
    print("Saving final model")
    np.savez(
        modelfile, **{
            'param%d' % i: p
            for i, p in enumerate(lasagne.layers.get_all_param_values(network))
        })
    with io.open(modelfile + '.vars', 'wb') as f:
        f.writelines('%s=%s\n' % kv for kv in cfg.items())
    if options.save_errors:
        np.savez(modelfile[:-len('.npz')] + '.err.npz',
                 np.asarray(errors).reshape(epoch + 1, -1))
コード例 #11
0
ファイル: test2.py プロジェクト: soumil505/lexie-2.0
import tensorflow as tf
from model import architecture
from preprocessing import word2mat
import numpy as np


allowed_chars="qwertyuiopasdfghjklzxcvbnm'-_1234567890 "

sequence_length=30

hidden_units=16




word1,word2,target,output,loss=architecture(allowed_chars,sequence_length,hidden_units)
optimizer = tf.train.AdamOptimizer().minimize(loss)
saver=tf.train.Saver()
print("model loaded\n\n")


with tf.Session() as sess:
    saver.restore(sess,"/saved/pretrained.ckpt")
    print("session restored")
    while input("continue(y/n) ").lower()!="n":
        w1=input("word1:")
        w2=input("word2:")
        w1=np.asarray([word2mat(w1,allowed_chars,sequence_length)])
        w2=np.asarray([word2mat(w2,allowed_chars,sequence_length)])
        print(tf.nn.sigmoid(output).eval({word1:w1,word2:w2}))