def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=True) ladder = setup_model(p) # Analyze activations dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = {'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': []} # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def analyze(cli_params): """ called when evaluating :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s" % p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop(extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream( data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting( { "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3) ]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = { 'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': [] } # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] inputs = {k: numpy.concatenate(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]
def dump_unlabeled_encoder(cli_params): """ called when dumping :return: inputs, result """ p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=(p.data_type == 'test')) ladder = setup_model(p) # Analyze activations if p.data_type == 'train': dset, indices, calc_batchnorm = data.train, data.train_ind, False elif p.data_type == 'valid': dset, indices, calc_batchnorm = data.valid, data.valid_ind, True elif p.data_type == 'test': dset, indices, calc_batchnorm = data.test, data.test_ind, True else: raise Exception("Unknown data-type %s"%p.data_type) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean, ladder.oos.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), balanced_classes=p.balanced_classes, cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_O', ladder.oos.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() all_ind = numpy.arange(dset.num_examples) # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, all_ind, batch_size=p.get('batch_size'), n_labeled=len(all_ind), n_unlabeled=len(all_ind), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # If layer=-1 we want out the values after softmax if p.layer < 0: # ladder.act.clean.unlabeled.h is a dict not a list outputs = ladder.act.clean.labeled.h[len(ladder.layers) + p.layer] else: outputs = ladder.act.clean.labeled.h[p.layer] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] # Loop over one epoch for d in it: # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] return res[0]
def analyze(cli_params): p, _ = load_and_log_params(cli_params) _, data, whiten, cnorm = setup_data(p, test_set=True) ladder = setup_model(p) # p.data_type = 'train' ### RUN the evaluation on the TRAIN dataset ... # Analyze activations print("------------------------------------------ Batch Sz = " + str(p.batch_size) + " ---------") dset, indices, calc_batchnorm = { 'train': (data.train, data.train_ind, False), 'valid': (data.valid, data.valid_ind, True), 'test': (data.test, data.test_ind, True), }[p.data_type] print("data.train_ind " + str(data.train_ind)) print("data.train_ind " + str(data.train_ind.shape)) print("data - keys " + str((data.keys()))) for k in data.keys(): if "_ind" in k: print(k + " --> SZ: " + str(len(data[k]))) else: print(k + " " + str(data[k].num_examples)) if calc_batchnorm: logger.info('Calculating batch normalization for clean.labeled path') main_loop = DummyLoop( extensions=[ FinalTestMonitoring( [ladder.costs.class_clean, ladder.error.clean] + ladder.costs.denois.values(), make_datastream(data.train, data.train_ind, # These need to match with the training p.batch_size, n_labeled=p.labeled_samples, n_unlabeled=len(data.train_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), #, balanced_classes=False), make_datastream(data.valid, data.valid_ind, p.valid_batch_size, n_labeled=len(data.valid_ind), n_unlabeled=len(data.valid_ind), cnorm=cnorm, whiten=whiten, scheme=ShuffledScheme), prefix="valid_final", before_training=True), ShortPrinting({ "valid_final": OrderedDict([ ('VF_C_class', ladder.costs.class_clean), ('VF_E', ladder.error.clean), ('VF_C_de', [ladder.costs.denois.get(0), ladder.costs.denois.get(1), ladder.costs.denois.get(2), ladder.costs.denois.get(3)]), ]), }, after_training=True, use_log=False), ]) main_loop.run() # Make a datastream that has all the indices in the labeled pathway ds = make_datastream(dset, indices, batch_size=p.get('batch_size'), n_labeled=len(indices), n_unlabeled=len(indices), balanced_classes=False, whiten=whiten, cnorm=cnorm, scheme=SequentialScheme) # We want out the values after softmax outputs = ladder.act.clean.labeled.h[len(ladder.layers) - 1] # Replace the batch normalization paramameters with the shared variables if calc_batchnorm: outputreplacer = TestMonitoring() _, _, outputs = outputreplacer._get_bn_params(outputs) cg = ComputationGraph(outputs) f = cg.get_theano_function() it = ds.get_epoch_iterator(as_dict=True) res = [] inputs = {'features_labeled': [], 'targets_labeled': [], 'features_unlabeled': []} # Loop over one epoch for d in it: # Store all inputs for k, v in d.iteritems(): inputs[k] += [v] # Store outputs res += [f(*[d[str(inp)] for inp in cg.inputs])] # Concatenate all minibatches res = [numpy.vstack(minibatches) for minibatches in zip(*res)] # print("res = " + str(res)) print("res.shape = " + str(res[0].shape)) print("input shapes = " + str([(k,len(v)) for k,v in inputs.iteritems()])) # print("inputs = " + str(inputs)) inputs = {k: numpy.vstack(v) for k, v in inputs.iteritems()} return inputs['targets_labeled'], res[0]