def gen_classifier_data(index=None): target_dir = util.get_classifier_data_dir() util.prepare_dir(target_dir, hard=False) suffix = str(constants.run_index()) if index is None else str(index) if os.path.isfile(os.path.join(target_dir, 'test-' + suffix + '.txt')) and \ os.path.isfile(os.path.join(target_dir, 'validation-' + suffix + '.txt')) and \ os.path.isfile(os.path.join(target_dir, 'train-' + suffix + '.txt')): return TOTAL = 50000 with open(os.path.join(target_dir, 'train-' + suffix + '.txt'), 'w') \ as outfile: json.dump(create_balanced_dataset(int(TOTAL * TRAIN_RATIO / 100)), outfile) logger.info('Train data written.') with open(os.path.join(target_dir, 'test-' + suffix + '.txt'), 'w') \ as outfile: json.dump(create_balanced_dataset(int(TOTAL * TEST_RATIO / 100)), outfile) logger.info('Test data written.') with open(os.path.join(target_dir, 'validation-' + suffix + '.txt'), 'w') \ as outfile: json.dump(create_balanced_dataset(int(TOTAL * VALIDATION_RATIO / 100)), outfile) logger.info('Validation data written.')
def __init__(self, autoencoder_test_data, classifier_train_json, classifier_test_json, classifier_validation_json, index=None): self.autoencoder_test_data = autoencoder_test_data self.classifier_train_json = classifier_train_json self.classifier_test_json = classifier_test_json self.classifier_validation_json = classifier_validation_json self.compressed_set = set() for img in autoencoder_test_data: self.compressed_set.add(draw_util.compress_bits(img)) self.classifier_accuracy_all = [] self.cost_all = [] self.reconstruction_loss_all = [] self.kl_divergence_all = [] self.reconstruction_accuracy_all = [] results_dir = util.get_results_dir() util.prepare_dir(results_dir, hard=False) self.index = constants.run_index() if index is None else index self.classifier_summary_file = os.path.join(results_dir, 'classifier_accuracy_summary_' + str(self.index) + '.txt')
def go_mnist(architecture, run_index=None): mnist = input_data.read_data_sets(os.path.join(os.path.expanduser('~'), 'MNIST_data'), one_hot=True) logger.info('Start fetching.') train_all, _, validation = mnist.train.images, mnist.test.images, mnist.validation.images class_indices = [[] for _ in range(10)] class_counts = [0] * 10 for index, label in enumerate(mnist.train.labels): digit = np.argmax(label) class_indices[digit].append(index) class_counts[digit] += 1 logger.info('Fetching completed.') for labels_percentage in range(20, 101, 20): training_set_indices = [] for digit in range(10): training_set_indices += random.sample( class_indices[digit], (class_counts[digit] * labels_percentage) / 100) train = train_all[training_set_indices] training_set_indices_file_name = str(run_index) + '-' + str(labels_percentage) + \ '-training-indices' if architecture == constants.CONV: training_set_indices_file_name += '-conv' training_set_indices_file_name += '.txt' util.write_list_to_file( training_set_indices, os.path.join(util.get_logs_dir(), training_set_indices_file_name)) flag = True beta = 0.0 while beta - 4.0 < 1e-3: seq_index = str(constants.run_index() if run_index is None else run_index) + \ '-' + str(labels_percentage) + '-' + str(beta) + '-mnist' if architecture == constants.CONV: seq_index += '-conv' logger.info( 'Start building the variational autoencoder architecture.') logger.info('Beta = {0}, Seq index = {1}'.format(beta, seq_index)) autoencoder = get_autoencoder.mnist(architecture, beta, True, seq_index=seq_index) is_training_successful = train_and_log_autoencoder( autoencoder, train, validation) if not is_training_successful: flag = False break beta += 0.1 if flag and run_index is None: constants.increase_index() return flag
def gen_autoencoder_data(gen_unique=True, reduced=False): target_dir = util.get_autoencoder_data_dir() suffix = ('' if not gen_unique else 'unique-') + str(constants.run_index()) if os.path.isfile(os.path.join(target_dir, 'test-' + suffix + '.npy')) and \ os.path.isfile(os.path.join(target_dir, 'validation-' + suffix + '.npy')) and \ os.path.isfile(os.path.join(target_dir, 'train-' + suffix + '.npy')): return util.prepare_dir(target_dir, hard=False) train, test, validation = [], [], [] tot = -1 shapes = ['square', 'ellipse', 'triangle'] if not reduced else ['square'] for shape in shapes: _train, _test, _validation = split_for_shape(shape, tot, gen_unique, reduced) if tot == -1: tot = len(_train) + len(_test) + len(_validation) random.shuffle(_train) random.shuffle(_test) random.shuffle(_validation) train.extend(_train) test.extend(_test) validation.extend(_validation) random.shuffle(train) random.shuffle(test) random.shuffle(validation) statistics(train, test, validation) logger.info('Separation done.') np_test = np.array([ draw_util.encoded_image_to_flattened_bits(encoded) for encoded in test ]) np_test = np.random.permutation(np_test) np.save(os.path.join(target_dir, 'test-' + suffix), np_test) logger.info('Test dataset converted.') np_validation = np.array([ draw_util.encoded_image_to_flattened_bits(encoded) for encoded in validation ]) np_validation = np.random.permutation(np_validation) np.save(os.path.join(target_dir, 'validation-' + suffix), np_validation) logger.info('Validation dataset converted.') np_train = np.array([ draw_util.encoded_image_to_flattened_bits(encoded) for encoded in train ]) np_train = np.random.permutation(np_train) np.save(os.path.join(target_dir, 'train-' + suffix), np_train) logger.info('Train dataset converted.')
def get_autoencoder_data(index=None, get_unique=True): """Loads the autoencoder clean data. By default the index in run_counter.txt is used - useful for training. """ suffix = ('' if not get_unique else 'unique-') + str(constants.run_index() if index is None \ else index) data_dir = get_autoencoder_data_dir() return np.load(os.path.join(data_dir, 'train-' + suffix + '.npy')), \ np.load(os.path.join(data_dir, 'test-' + suffix + '.npy')), \ np.load(os.path.join(data_dir, 'validation-' + suffix + '.npy'))
def get_classifier_data(index=None): """Loads the classifier data.""" suffix = str(constants.run_index()) if index is None else str(index) data_dir = get_classifier_data_dir() with open(os.path.join(data_dir, 'train-' + suffix + '.txt')) as data_file: train_data = json.load(data_file) with open(os.path.join(data_dir, 'test-' + suffix + '.txt')) as data_file: test_data = json.load(data_file) with open(os.path.join(data_dir, 'validation-' + suffix + '.txt')) as data_file: validation_data = json.load(data_file) return train_data, test_data, validation_data
def go_shapes(denoising, architecture, reduced=False): # Generate data generate_data.gen_autoencoder_data(reduced=reduced) # Fetch training data logger.info('Start fetching.') train, _, validation = util.get_autoencoder_data() if len(validation) > 40000: validation = validation[:40000] logger.info('Train and validation data are read.') logger.info('Train set: {0}'.format(len(train))) logger.info('Validation set: {0}'.format(len(validation))) # Train and log all autoencoders current_beta = constants.BETA_LOW flag = True while True: if current_beta - constants.BETA_HIGH > 1e-6: break # Experiment index seq_index = str(constants.run_index()) + '-' + str(current_beta) if denoising: seq_index += '-denoising' if architecture == constants.CONV: seq_index += '-conv' logger.info('Start building the variational autoencoder architecture.') logger.info('Beta = {0}, Seq index = {1}'.format( current_beta, seq_index)) lr = 0.00003 if current_beta < 1e-3 else 0.001 autoencoder = get_autoencoder.shapes_set(architecture, current_beta, lr, seq_index, denoising) is_training_successful = train_and_log_autoencoder( autoencoder, train, validation) if not is_training_successful: flag = False break current_beta += constants.BETA_STEP if flag: constants.increase_index() return flag