Beispiel #1
0
def analyze(cli_params):
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=True)
    ladder = setup_model(p)

    # Analyze activations
    dset, indices, calc_batchnorm = {
        'train': (data.train, data.train_ind, False),
        'valid': (data.valid, data.valid_ind, True),
        'test':  (data.test, data.test_ind, True),
    }[p.data_type]

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(
            extensions=[
                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error.clean]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    make_datastream(data.valid, data.valid_ind,
                                    p.valid_batch_size,
                                    n_labeled=len(data.valid_ind),
                                    n_unlabeled=len(data.valid_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="valid_final", before_training=True),
                ShortPrinting({
                    "valid_final": OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_C_de', [ladder.costs.denois.get(0),
                                     ladder.costs.denois.get(1),
                                     ladder.costs.denois.get(2),
                                     ladder.costs.denois.get(3)]),
                    ]),
                }, after_training=True, use_log=False),
            ])
        main_loop.run()

    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset, indices,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(indices),
                         n_unlabeled=len(indices),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # We want out the values after softmax
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _,  outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {'features_labeled': [],
              'targets_labeled': [],
              'features_unlabeled': []}
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.iteritems():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()}

    return inputs['targets_labeled'], res[0]
Beispiel #2
0
def analyze(cli_params):
    """
    called when evaluating
    :return: inputs, result
    """
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test'))
    ladder = setup_model(p)

    # Analyze activations
    if p.data_type == 'train':
        dset, indices, calc_batchnorm = data.train, data.train_ind, False
    elif p.data_type == 'valid':
        dset, indices, calc_batchnorm = data.valid, data.valid_ind, True
    elif p.data_type == 'test':
        dset, indices, calc_batchnorm = data.test, data.test_ind, True
    else:
        raise Exception("Unknown data-type %s" % p.data_type)

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(extensions=[
            FinalTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean] +
                ladder.costs.denois.values(),
                make_datastream(
                    data.train,
                    data.train_ind,
                    # These need to match with the training
                    p.batch_size,
                    n_labeled=p.labeled_samples,
                    n_unlabeled=len(data.train_ind),
                    cnorm=cnorm,
                    whiten=whiten,
                    scheme=ShuffledScheme),
                make_datastream(data.valid,
                                data.valid_ind,
                                p.valid_batch_size,
                                n_labeled=len(data.valid_ind),
                                n_unlabeled=len(data.valid_ind),
                                cnorm=cnorm,
                                whiten=whiten,
                                scheme=ShuffledScheme),
                prefix="valid_final",
                before_training=True),
            ShortPrinting(
                {
                    "valid_final":
                    OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_C_de', [
                            ladder.costs.denois.get(0),
                            ladder.costs.denois.get(1),
                            ladder.costs.denois.get(2),
                            ladder.costs.denois.get(3)
                        ]),
                    ]),
                },
                after_training=True,
                use_log=False),
        ])
        main_loop.run()

    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset,
                         indices,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(indices),
                         n_unlabeled=len(indices),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # If layer=-1 we want out the values after softmax
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _, outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {
        'features_labeled': [],
        'targets_labeled': [],
        'features_unlabeled': []
    }
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.iteritems():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    inputs = {k: numpy.concatenate(v) for k, v in inputs.iteritems()}

    return inputs['targets_labeled'], res[0]
Beispiel #3
0
def dump_unlabeled_encoder(cli_params):
    """
    called when dumping
    :return: inputs, result
    """
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test'))
    ladder = setup_model(p)

    # Analyze activations
    if p.data_type == 'train':
        dset, indices, calc_batchnorm = data.train, data.train_ind, False
    elif p.data_type == 'valid':
        dset, indices, calc_batchnorm = data.valid, data.valid_ind, True
    elif p.data_type == 'test':
        dset, indices, calc_batchnorm = data.test, data.test_ind, True
    else:
        raise Exception("Unknown data-type %s"%p.data_type)

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(
            extensions=[
                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    balanced_classes=p.balanced_classes,
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    make_datastream(data.valid, data.valid_ind,
                                    p.valid_batch_size,
                                    n_labeled=len(data.valid_ind),
                                    n_unlabeled=len(data.valid_ind),
                                    balanced_classes=p.balanced_classes,
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="valid_final", before_training=True),
                ShortPrinting({
                    "valid_final": OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_O', ladder.oos.clean),
                        ('VF_C_de', [ladder.costs.denois.get(0),
                                     ladder.costs.denois.get(1),
                                     ladder.costs.denois.get(2),
                                     ladder.costs.denois.get(3)]),
                    ]),
                }, after_training=True, use_log=False),
            ])
        main_loop.run()

    all_ind = numpy.arange(dset.num_examples)
    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset, all_ind,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(all_ind),
                         n_unlabeled=len(all_ind),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # If layer=-1 we want out the values after softmax
    if p.layer < 0:
        # ladder.act.clean.unlabeled.h is a dict not a list
        outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer]
    else:
        outputs = ladder.act.clean.labeled.h[p.layer]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _,  outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []

    # Loop over one epoch
    for d in it:
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]

    return res[0]
Beispiel #4
0
def analyze(cli_params):
    p, _ = load_and_log_params(cli_params)
    _, data, whiten, cnorm = setup_data(p, test_set=True)
    ladder = setup_model(p)
    # p.data_type = 'train' ### RUN the evaluation on the TRAIN dataset ... 
    # Analyze activations
    print("------------------------------------------ Batch Sz = " + str(p.batch_size) + " ---------")
    dset, indices, calc_batchnorm = {
        'train': (data.train, data.train_ind, False),
        'valid': (data.valid, data.valid_ind, True),
        'test':  (data.test, data.test_ind, True),
    }[p.data_type]

    print("data.train_ind " + str(data.train_ind))
    print("data.train_ind " + str(data.train_ind.shape))
    print("data - keys " + str((data.keys())))
    for k in data.keys():
        if "_ind" in k:
            print(k + " --> SZ: " + str(len(data[k])))
        else:
            print(k + " " + str(data[k].num_examples))

    if calc_batchnorm:
        logger.info('Calculating batch normalization for clean.labeled path')
        main_loop = DummyLoop(
            extensions=[
                FinalTestMonitoring(
                    [ladder.costs.class_clean, ladder.error.clean]
                    + ladder.costs.denois.values(),
                    make_datastream(data.train, data.train_ind,
                                    # These need to match with the training
                                    p.batch_size,
                                    n_labeled=p.labeled_samples,
                                    n_unlabeled=len(data.train_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme), #, balanced_classes=False),
                    make_datastream(data.valid, data.valid_ind,
                                    p.valid_batch_size,
                                    n_labeled=len(data.valid_ind),
                                    n_unlabeled=len(data.valid_ind),
                                    cnorm=cnorm,
                                    whiten=whiten, scheme=ShuffledScheme),
                    prefix="valid_final", before_training=True),
                ShortPrinting({
                    "valid_final": OrderedDict([
                        ('VF_C_class', ladder.costs.class_clean),
                        ('VF_E', ladder.error.clean),
                        ('VF_C_de', [ladder.costs.denois.get(0),
                                     ladder.costs.denois.get(1),
                                     ladder.costs.denois.get(2),
                                     ladder.costs.denois.get(3)]),
                    ]),
                }, after_training=True, use_log=False),
            ])
        main_loop.run()

    # Make a datastream that has all the indices in the labeled pathway
    ds = make_datastream(dset, indices,
                         batch_size=p.get('batch_size'),
                         n_labeled=len(indices),
                         n_unlabeled=len(indices),
                         balanced_classes=False,
                         whiten=whiten,
                         cnorm=cnorm,
                         scheme=SequentialScheme)

    # We want out the values after softmax
    outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1]

    # Replace the batch normalization paramameters with the shared variables
    if calc_batchnorm:
        outputreplacer = TestMonitoring()
        _, _,  outputs = outputreplacer._get_bn_params(outputs)

    cg = ComputationGraph(outputs)
    f = cg.get_theano_function()

    it = ds.get_epoch_iterator(as_dict=True)
    res = []
    inputs = {'features_labeled': [],
              'targets_labeled': [],
              'features_unlabeled': []}
    # Loop over one epoch
    for d in it:
        # Store all inputs
        for k, v in d.iteritems():
            inputs[k] += [v]
        # Store outputs
        res += [f(*[d[str(inp)] for inp in cg.inputs])]

    # Concatenate all minibatches
    res = [numpy.vstack(minibatches) for minibatches in zip(*res)]
    # print("res = " + str(res))
    print("res.shape = " + str(res[0].shape))
    print("input shapes = " + str([(k,len(v)) for k,v in inputs.iteritems()]))
    # print("inputs = " + str(inputs))
    inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()}

    return inputs['targets_labeled'], res[0]