def gen_classifier_data(index=None):
    target_dir = util.get_classifier_data_dir()
    util.prepare_dir(target_dir, hard=False)
    suffix = str(constants.run_index()) if index is None else str(index)

    if os.path.isfile(os.path.join(target_dir, 'test-' + suffix + '.txt')) and \
        os.path.isfile(os.path.join(target_dir, 'validation-' + suffix + '.txt')) and \
        os.path.isfile(os.path.join(target_dir, 'train-' + suffix + '.txt')):
        return

    TOTAL = 50000
    with open(os.path.join(target_dir, 'train-' + suffix + '.txt'), 'w') \
        as outfile:
        json.dump(create_balanced_dataset(int(TOTAL * TRAIN_RATIO / 100)),
                  outfile)
        logger.info('Train data written.')
    with open(os.path.join(target_dir, 'test-' + suffix + '.txt'), 'w') \
        as outfile:
        json.dump(create_balanced_dataset(int(TOTAL * TEST_RATIO / 100)),
                  outfile)
        logger.info('Test data written.')
    with open(os.path.join(target_dir, 'validation-' + suffix + '.txt'), 'w') \
        as outfile:
        json.dump(create_balanced_dataset(int(TOTAL * VALIDATION_RATIO / 100)),
                  outfile)
        logger.info('Validation data written.')
    def __init__(self,
        autoencoder_test_data,
        classifier_train_json,
        classifier_test_json,
        classifier_validation_json,
        index=None):

        self.autoencoder_test_data = autoencoder_test_data
        self.classifier_train_json = classifier_train_json
        self.classifier_test_json = classifier_test_json
        self.classifier_validation_json = classifier_validation_json
        
        self.compressed_set = set()
        for img in autoencoder_test_data:
            self.compressed_set.add(draw_util.compress_bits(img))
        self.classifier_accuracy_all = []
        self.cost_all = []
        self.reconstruction_loss_all = []
        self.kl_divergence_all = []
        self.reconstruction_accuracy_all = []

        results_dir = util.get_results_dir()
        util.prepare_dir(results_dir, hard=False)

        self.index = constants.run_index() if index is None else index
        self.classifier_summary_file = os.path.join(results_dir,
            'classifier_accuracy_summary_' + str(self.index) + '.txt')
def go_mnist(architecture, run_index=None):
    mnist = input_data.read_data_sets(os.path.join(os.path.expanduser('~'),
                                                   'MNIST_data'),
                                      one_hot=True)

    logger.info('Start fetching.')
    train_all, _, validation = mnist.train.images, mnist.test.images, mnist.validation.images
    class_indices = [[] for _ in range(10)]
    class_counts = [0] * 10
    for index, label in enumerate(mnist.train.labels):
        digit = np.argmax(label)
        class_indices[digit].append(index)
        class_counts[digit] += 1
    logger.info('Fetching completed.')

    for labels_percentage in range(20, 101, 20):
        training_set_indices = []
        for digit in range(10):
            training_set_indices += random.sample(
                class_indices[digit],
                (class_counts[digit] * labels_percentage) / 100)
        train = train_all[training_set_indices]
        training_set_indices_file_name = str(run_index) + '-' + str(labels_percentage) + \
            '-training-indices'
        if architecture == constants.CONV:
            training_set_indices_file_name += '-conv'
        training_set_indices_file_name += '.txt'
        util.write_list_to_file(
            training_set_indices,
            os.path.join(util.get_logs_dir(), training_set_indices_file_name))
        flag = True
        beta = 0.0
        while beta - 4.0 < 1e-3:
            seq_index = str(constants.run_index() if run_index is None else run_index) + \
                '-' + str(labels_percentage) + '-' + str(beta) + '-mnist'
            if architecture == constants.CONV:
                seq_index += '-conv'

            logger.info(
                'Start building the variational autoencoder architecture.')
            logger.info('Beta = {0}, Seq index = {1}'.format(beta, seq_index))

            autoencoder = get_autoencoder.mnist(architecture,
                                                beta,
                                                True,
                                                seq_index=seq_index)
            is_training_successful = train_and_log_autoencoder(
                autoencoder, train, validation)
            if not is_training_successful:
                flag = False
                break
            beta += 0.1
        if flag and run_index is None:
            constants.increase_index()

    return flag
def gen_autoencoder_data(gen_unique=True, reduced=False):
    target_dir = util.get_autoencoder_data_dir()
    suffix = ('' if not gen_unique else 'unique-') + str(constants.run_index())
    if os.path.isfile(os.path.join(target_dir, 'test-' + suffix + '.npy')) and \
        os.path.isfile(os.path.join(target_dir, 'validation-' + suffix + '.npy')) and \
        os.path.isfile(os.path.join(target_dir, 'train-' + suffix + '.npy')):
        return
    util.prepare_dir(target_dir, hard=False)

    train, test, validation = [], [], []
    tot = -1
    shapes = ['square', 'ellipse', 'triangle'] if not reduced else ['square']
    for shape in shapes:
        _train, _test, _validation = split_for_shape(shape, tot, gen_unique,
                                                     reduced)

        if tot == -1:
            tot = len(_train) + len(_test) + len(_validation)

        random.shuffle(_train)
        random.shuffle(_test)
        random.shuffle(_validation)

        train.extend(_train)
        test.extend(_test)
        validation.extend(_validation)

        random.shuffle(train)
        random.shuffle(test)
        random.shuffle(validation)

    statistics(train, test, validation)

    logger.info('Separation done.')

    np_test = np.array([
        draw_util.encoded_image_to_flattened_bits(encoded) for encoded in test
    ])
    np_test = np.random.permutation(np_test)
    np.save(os.path.join(target_dir, 'test-' + suffix), np_test)
    logger.info('Test dataset converted.')

    np_validation = np.array([
        draw_util.encoded_image_to_flattened_bits(encoded)
        for encoded in validation
    ])
    np_validation = np.random.permutation(np_validation)
    np.save(os.path.join(target_dir, 'validation-' + suffix), np_validation)
    logger.info('Validation dataset converted.')

    np_train = np.array([
        draw_util.encoded_image_to_flattened_bits(encoded) for encoded in train
    ])
    np_train = np.random.permutation(np_train)
    np.save(os.path.join(target_dir, 'train-' + suffix), np_train)
    logger.info('Train dataset converted.')
def get_autoencoder_data(index=None, get_unique=True):
    """Loads the autoencoder clean data.

    By default the index in run_counter.txt is used - useful for training.
    """
    suffix = ('' if not get_unique else 'unique-') + str(constants.run_index() if index is None \
        else index)
    data_dir = get_autoencoder_data_dir()
    return np.load(os.path.join(data_dir, 'train-' + suffix + '.npy')), \
        np.load(os.path.join(data_dir, 'test-' + suffix + '.npy')), \
        np.load(os.path.join(data_dir, 'validation-' + suffix + '.npy'))
def get_classifier_data(index=None):
    """Loads the classifier data."""
    suffix = str(constants.run_index()) if index is None else str(index)
    data_dir = get_classifier_data_dir()
    with open(os.path.join(data_dir, 'train-' + suffix + '.txt')) as data_file:
        train_data = json.load(data_file)
    with open(os.path.join(data_dir, 'test-' + suffix + '.txt')) as data_file:
        test_data = json.load(data_file)
    with open(os.path.join(data_dir,
                           'validation-' + suffix + '.txt')) as data_file:
        validation_data = json.load(data_file)
    return train_data, test_data, validation_data
def go_shapes(denoising, architecture, reduced=False):
    # Generate data
    generate_data.gen_autoencoder_data(reduced=reduced)

    # Fetch training data
    logger.info('Start fetching.')
    train, _, validation = util.get_autoencoder_data()
    if len(validation) > 40000:
        validation = validation[:40000]
    logger.info('Train and validation data are read.')

    logger.info('Train set: {0}'.format(len(train)))
    logger.info('Validation set: {0}'.format(len(validation)))

    # Train and log all autoencoders
    current_beta = constants.BETA_LOW
    flag = True
    while True:
        if current_beta - constants.BETA_HIGH > 1e-6:
            break

        # Experiment index
        seq_index = str(constants.run_index()) + '-' + str(current_beta)
        if denoising:
            seq_index += '-denoising'
        if architecture == constants.CONV:
            seq_index += '-conv'

        logger.info('Start building the variational autoencoder architecture.')
        logger.info('Beta = {0}, Seq index = {1}'.format(
            current_beta, seq_index))

        lr = 0.00003 if current_beta < 1e-3 else 0.001
        autoencoder = get_autoencoder.shapes_set(architecture, current_beta,
                                                 lr, seq_index, denoising)

        is_training_successful = train_and_log_autoencoder(
            autoencoder, train, validation)
        if not is_training_successful:
            flag = False
            break
        current_beta += constants.BETA_STEP
    if flag:
        constants.increase_index()
    return flag