Ejemplo n.º 1
0
def thumbnail_base_generator(root, batch_size=BATCH_SIZE, input_shape=INPUT_SHAPE,
                             input_label=INPUT_LABEL, output_label=OUTPUT_LABEL,
                             scaling=1):
    input_filenames = [os.path.join(root, input_label, filename)
                       for filename in os.listdir(os.path.join(root, input_label))]
    output_filenames = [os.path.join(root, output_label, filename)
                        for filename in os.listdir(os.path.join(root, output_label))]
    first_output = read(output_filenames[0])
    yield None
    while True:
        batch_input = np.zeros((batch_size, input_shape[0], input_shape[1], input_shape[2]))
        batch_output = np.zeros((batch_size, input_shape[0], input_shape[1], first_output.shape[-1]))

        filenames_index = np.random.randint(0, len(input_filenames), size=batch_size)
        for i, filename_index in enumerate(filenames_index):
            input_ = read(input_filenames[filename_index])
            output = read(output_filenames[filename_index])
            if scaling != 1 and input_.shape[0] * scaling > input_shape[0] \
                    and input_.shape[1] * scaling > input_shape[1]:
                input_ = resize(input_, (int(input_.shape[0] * scaling), int(input_.shape[1] * scaling)))
                output = resize(output, (int(output.shape[0] * scaling), int(output.shape[1] * scaling)))

            if input_shape[0] == input_.shape[0]:
                x_offset = 0
            else:
                x_offset = np.random.randint(0, input_.shape[0] - input_shape[0])
            if input_shape[1] == input_.shape[1]:
                y_offset = 0
            else:
                y_offset = np.random.randint(0, input_.shape[1] - input_shape[1])
            batch_input[i] = input_[x_offset:x_offset + input_shape[0], y_offset:y_offset + input_shape[1]]
            batch_output[i] = output[x_offset:x_offset + input_shape[0], y_offset:y_offset + input_shape[1]]
        yield batch_input, batch_output
Ejemplo n.º 2
0
def run_batch_on_filenames_with_multiple_inputs(filenames,
                                                model,
                                                normalized_inputs,
                                                batch_size=BATCH_SIZE):
    output_shape = model.output.shape[1:]
    input_shape = model.layers[0].input_shape[-3:]
    output = np.zeros([len(filenames)] + list(output_shape))

    batch_range = range(len(filenames) // batch_size + 1)
    batch_range = tqdm.tqdm(batch_range)
    for batch_i in batch_range:
        batch_filename = filenames[batch_i * batch_size:(batch_i + 1) *
                                   batch_size]
        if not batch_filename:
            break
        batch = np.array(
            [read(filename, input_shape) for filename in batch_filename])
        batch = [
            batch,
            np.array([
                normalized_inputs[os.path.split(filename)[-1]]
                for filename in batch_filename
            ])
        ]
        output[batch_i * batch_size:(batch_i + 1) *
               batch_size] = model.predict(batch)
    return output
Ejemplo n.º 3
0
def saliency_base_generator(root, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE, output_shape=OUTPUT_SHAPE,
                            folders=None):
    folders = list_dir(root) if folders is None else [os.path.join(root, folder) for folder in
                                                      folders[1:-1].split(', ')]
    if len(folders) == 2:
        colors = np.array([[0], [1]])
    elif len(folders) > 2:
        colors = np.eye(len(folders), dtype=np.int) * 1
    else:
        raise ValueError(f'Number of folders (currently {len(folders)}) should be equal or greater than 2')

    mapping = {os.path.join(folder, filename): colors[i]
               for (i, folder) in enumerate(folders)
               for filename in os.listdir(folder)}

    label_to_filename = {folder: [os.path.join(folder, filename)
                                  for filename in os.listdir(folder)]
                         for folder in folders
                         }

    [convert_link(filename) for filename in mapping.keys() if filename.endswith('.lnk')]

    yield None
    while True:
        batch_labels = np.random.choice(folders, size=batch_size)
        batch_path = [np.random.choice(label_to_filename[label]) for label in batch_labels]

        batch_input = np.array([read(path, input_shape=input_shape) for path in batch_path])
        batch_output = np.zeros((batch_size, output_shape[0], output_shape[1], output_shape[2]))

        for i, input_filename in enumerate(batch_path):
            batch_output[i, :, :] = mapping[input_filename]

        yield batch_input, batch_output
Ejemplo n.º 4
0
def autoencoder_base_generator(root, batch_size=BATCH_SIZE, input_shape=INPUT_SHAPE):
    filenames = glob.glob(os.path.join(root, '*', '*.*')) + glob.glob(os.path.join(root, '*.*'))

    yield None
    while True:
        batch_path = np.random.choice(filenames, size=batch_size)
        batch_input = np.array([read(path, input_shape) for path in batch_path])
        yield batch_input, batch_input.copy()
Ejemplo n.º 5
0
def regressor_base_generator(root,
                             attributes,
                             batch_size=BATCH_SIZE,
                             input_shape=INPUT_SHAPE):
    if isinstance(attributes, str):
        while ' ' in attributes:
            attributes = attributes.replace(' ', '')
        if attributes.startswith('(') and attributes.endswith(')'):
            attributes = attributes[1:-1]
        attributes = attributes.split(',')

    filenames = np.array(sorted(glob.glob(os.path.join(root, '*.*'))))
    with open(os.path.join(os.path.split(root)[0],
                           'output.json')) as json_file:
        data = json.load(json_file)

    checked_filenames = []
    for filename in filenames:
        if filename.endswith('.lnk'):
            filename = convert_link(filename)
        # if os.path.split(filename)[-1] not in data:
        #     print(filename, os.path.split(filename)[-1])
        if all([
                os.path.split(filename)[-1] in data
                and attribute in data[os.path.split(filename)[-1]]
                and not np.isnan(data[os.path.split(filename)[-1]][attribute])
                for attribute in attributes
        ]):
            checked_filenames.append(filename)
    filenames = np.array(checked_filenames)

    means = np.mean([[
        data[os.path.split(filename)[-1]][attribute]
        for attribute in attributes
    ] for filename in filenames],
                    axis=0)
    stds = np.std([[
        data[os.path.split(filename)[-1]][attribute]
        for attribute in attributes
    ] for filename in filenames],
                  axis=0) * 2

    print(f'The attributes were defined for {len(filenames)} files')
    print('MEANS:', means)
    print('STDS:', stds)
    yield tuple(means), tuple(stds)
    while True:
        filenames_index = np.random.randint(0, len(filenames), size=batch_size)

        batch_filenames = filenames[filenames_index]
        batch_input = np.array(
            [read(filename, input_shape) for filename in batch_filenames])
        batch_output = np.array([[
            data[os.path.split(filename)[-1]][attribute]
            for attribute in attributes
        ] for filename in batch_filenames])
        batch_output = (batch_output - means) / stds
        yield batch_input, batch_output
Ejemplo n.º 6
0
def segmenter_base_generator(root, batch_size=BATCH_SIZE, input_shape=INPUT_SHAPE, output_shape=OUTPUT_SHAPE,
                             input_label=INPUT_LABEL, output_label=OUTPUT_LABEL, attributes=None):
    input_filenames = np.array(sorted(glob.glob(os.path.join(root, input_label, '*.*'))))
    output_filenames = np.array(sorted(glob.glob(os.path.join(root, output_label, '*.*'))))

    assert len(input_filenames) == len(output_filenames)

    [convert_link(filename) for filename in input_filenames if filename.endswith('.lnk')]
    [convert_link(filename) for filename in output_filenames if filename.endswith('.lnk')]

    add_additional_inputs = get_add_additional_inputs(root, attributes)

    yield None
    while True:
        batch_index = np.random.randint(0, len(input_filenames), size=batch_size)
        batch_input_path = input_filenames[batch_index]
        batch_input = np.array([read(path, input_shape) for path in batch_input_path])

        batch_output_path = output_filenames[batch_index]
        batch_output = np.array([read(path, output_shape) for path in batch_output_path])

        batch_input = add_additional_inputs(batch_input, batch_input_path)
        yield batch_input, batch_output
Ejemplo n.º 7
0
def categorizer_base_generator(root,
                               batch_size=BATCH_SIZE,
                               input_shape=INPUT_SHAPE,
                               folders=None,
                               attributes=None):
    folders = list_dir(root) if folders is None else [
        os.path.join(root, folder) for folder in folders[1:-1].split(', ')
    ]
    filename_to_hot_label = {
        os.path.join(folder, filename):
        make_categorizer_output(folders.index(folder), len(folders))
        for folder in folders for filename in os.listdir(folder)
    }

    label_to_filename = {
        folder:
        [os.path.join(folder, filename) for filename in os.listdir(folder)]
        for folder in folders
    }

    [
        convert_link(filename) for filename in filename_to_hot_label.keys()
        if filename.endswith('.lnk')
    ]
    add_additional_inputs = get_add_additional_inputs(root, attributes)

    yield None
    while True:
        batch_labels = np.random.choice(folders, size=batch_size)
        batch_input_path = np.array([
            np.random.choice(label_to_filename[label])
            for label in batch_labels
        ])
        batch_input = np.array(
            [read(path, input_shape=input_shape) for path in batch_input_path])
        batch_output = np.array(
            [filename_to_hot_label[filename] for filename in batch_input_path])
        batch_input = add_additional_inputs(batch_input, batch_input_path)
        yield batch_input, batch_output
Ejemplo n.º 8
0
def run_batch_on_filenames(filenames,
                           model,
                           batch_size=BATCH_SIZE,
                           normalizer=None,
                           use_tqdm=True):
    output_shape = model.output.shape[1:]
    input_shape = model.layers[0].input_shape[-3:]
    output = np.zeros([len(filenames)] + list(output_shape))

    batch_range = range(len(filenames) // batch_size + 1)
    if use_tqdm:
        batch_range = tqdm.tqdm(batch_range)
    for batch_i in batch_range:
        batch_filename = filenames[batch_i * batch_size:(batch_i + 1) *
                                   batch_size]
        if not batch_filename:
            break
        batch = np.array(
            [read(filename, input_shape) for filename in batch_filename])
        if normalizer is not None:
            batch = normalizer(batch)
        output[batch_i * batch_size:(batch_i + 1) *
               batch_size] = model.predict(batch)
    return output