Ejemplo n.º 1
0
    def scoring(self, type='pt', save_imgs=False, save_cams=False):
        test_labels, pred_labels, total_batches = [], [], shutils.get_num_batches(
            self.num_test, cnt.BATCH_SIZE)

        if type == 'pt':
            encoder = shutils.load_data_pkl(cnt.PT_ENCODER_PATH)
            pred_out_dir = cnt.PT_PREDS_PATH
            cam_dir = cnt.PT_CAMS_PATH
            self.init_pt_model()
            model = self.pt_model
            model.load_weights(cnt.PT_MODEL_PATH)

        else:
            encoder = shutils.load_data_pkl(cnt.COLOR_ENCODER_PATH)
            pred_out_dir = cnt.COLOR_PREDS_PATH
            cam_dir = cnt.COLOR_CAMS_PATH
            self.init_color_model()
            model = self.color_model
            model.load_weights(cnt.COLOR_MODEL_PATH)

        num_batches, start = 0, 0

        for batch_data, batch_labels in self.data_generator(
                self.num_test, 'test', type):
            test_labels += batch_labels.tolist()
            predictions = self.predict(batch_data, type)
            pred_labels += predictions
            num_batches += 1

            indices = [start + i for i in range(len(batch_labels))]

            if save_imgs:
                utils.save_imgs(batch_data, indices, np.array(batch_labels),
                                np.array(predictions), encoder, pred_out_dir)

            if save_cams:
                utils.cam(model, batch_data, indices, np.array(batch_labels),
                          np.array(predictions), encoder, cam_dir)

            start += len(batch_labels)

            if num_batches == total_batches:
                break

        h = np.sum(np.array(pred_labels), axis=1)
        idx = np.nonzero(h > 0)[0]

        t_labels = encoder.inverse_transform(np.array(test_labels)[idx])
        p_labels = encoder.inverse_transform(np.array(pred_labels)[idx])

        print(classification_report(t_labels, p_labels))
Ejemplo n.º 2
0
def create_train_test():
    try:
        img_arr_file = tables.open_file(cnt.IMAGE_ARRAY_PATH, mode='r')
        img_arr = img_arr_file.root.data

        train_indices, test_indices = train_test_split(range(img_arr.shape[0]),
                                                       test_size=0.2)

        encoder = MultiLabelBinarizer()

        labels = shutils.load_data_pkl(cnt.LABELS_PATH)
        labels = [x.strip().split('__') for x in labels]

        transfomed_labels = encoder.fit_transform(labels)

        shutils.save_data_pkl(transfomed_labels, cnt.TRANSFORMED_LABELS_PATH)
        shutils.save_data_pkl(encoder, cnt.ENCODER_PATH)

        print(len(train_indices), len(test_indices))

        shutils.save_data_pkl(train_indices, cnt.TRAIN_INDICES_PATH)
        shutils.save_data_pkl(test_indices, cnt.TEST_INDICES_PATH)

    finally:
        img_arr_file.close()
Ejemplo n.º 3
0
def get_data_as_generator(num_data, prefix='train'):
    try:
        img_arr_file = tables.open_file(cnt.IMAGE_ARRAY_PATH, mode='r')
        img_arr = img_arr_file.root.data

        txt_arr = shutils.load_data_pkl(cnt.INPUT_TENSOR_PATH)
        txt_arr = np.array(txt_arr)

        labels = shutils.load_data_pkl(cnt.TRANSFORMED_LABELS_PATH)

        labels = np.array(labels)
        random.seed(42)

        if prefix == 'train':
            indices = shutils.load_data_pkl(cnt.TRAIN_INDICES_PATH)
        else:
            indices = shutils.load_data_pkl(cnt.TEST_INDICES_PATH)

        random.shuffle(indices)
        indices = np.array(indices)

        num_batches = int(math.ceil(float(num_data) / cnt.BATCH_SIZE))

        batch_num = 0

        while True:
            m = batch_num % num_batches

            start, end = m * cnt.BATCH_SIZE, min((m + 1) * cnt.BATCH_SIZE,
                                                 num_data)
            batch_indices = indices[start:end]

            out_img_arr = np.array([img_arr[x] for x in batch_indices])
            out_txt_arr = np.array([txt_arr[x] for x in batch_indices])

            batch_num += 1

            yield [out_img_arr, out_txt_arr], labels[batch_indices]

    finally:
        img_arr_file.close()
Ejemplo n.º 4
0
def get_data_as_generator(num_samples, type='train'):
    if type == 'train':
        src_tensor_train = shutils.load_data_pkl(cnt.SRC_TENSOR_TRAIN)
        trg_tensor_train = shutils.load_data_pkl(cnt.TRG_TENSOR_TRAIN)

        n = len(src_tensor_train)

        dataset = tf.data.Dataset.from_tensor_slices(
            (src_tensor_train, trg_tensor_train)).shuffle(n)

    else:
        src_tensor_valid = shutils.load_data_pkl(cnt.SRC_TENSOR_VALID)
        trg_tensor_valid = shutils.load_data_pkl(cnt.TRG_TENSOR_VALID)

        n = len(src_tensor_valid)

        dataset = tf.data.Dataset.from_tensor_slices(
            (src_tensor_valid, trg_tensor_valid)).shuffle(n)

    dataset = dataset.batch(cnt.BATCH_SIZE, drop_remainder=False)

    return iter(dataset)
Ejemplo n.º 5
0
def get_data_as_generator(num_data, prefix='train'):
    random.seed(42)

    word_vector_model = utils.get_vector_model(cnt.VECTOR_MODEL,
                                               char_tokens=False)
    char_vector_model = utils.get_vector_model(cnt.VECTOR_MODEL,
                                               char_tokens=True)

    data_pairs = shutils.load_data_pkl(
        os.path.join(cnt.PERSISTENCE_PATH, prefix + "_data_pairs.pkl"))
    random.shuffle(data_pairs)

    num_batches = shutils.get_num_batches(num_data, cnt.BATCH_SIZE)

    batch_num = 0

    while True:
        m = batch_num % num_batches

        start, end = m * cnt.BATCH_SIZE, min((m + 1) * cnt.BATCH_SIZE,
                                             num_data)

        word_tokens1, word_tokens2, char_tokens1, char_tokens2, labels = zip(
            *data_pairs[start:end])
        labels = np.array(labels)
        labels = np.expand_dims(labels, -1)

        word_data_1 = shutils.get_vectors(word_vector_model, word_tokens1,
                                          cnt.WORD_VECTOR_DIM)
        word_data_2 = shutils.get_vectors(word_vector_model, word_tokens2,
                                          cnt.WORD_VECTOR_DIM)

        char_data_1 = np.array([
            shutils.get_vectors(char_vector_model, x, cnt.CHAR_VECTOR_DIM)
            for x in char_tokens1
        ])
        char_data_2 = np.array([
            shutils.get_vectors(char_vector_model, x, cnt.CHAR_VECTOR_DIM)
            for x in char_tokens2
        ])

        batch_num += 1

        yield [word_data_1, word_data_2, char_data_1, char_data_2], labels
Ejemplo n.º 6
0
    def scoring(self):
        test_labels, pred_labels, total_batches = [], [], shutils.get_num_batches(
            self.num_test, cnt.BATCH_SIZE)
        encoder = shutils.load_data_pkl(cnt.ENCODER_PATH)

        num_batches = 0

        for batch_data, batch_labels in self.data_generator(
                self.num_test, 'test'):
            test_labels += batch_labels.tolist()
            predictions = self.predict(batch_data)
            pred_labels += predictions
            num_batches += 1

            if num_batches == total_batches:
                break

        t_labels = encoder.inverse_transform(np.array(test_labels))
        p_labels = encoder.inverse_transform(np.array(pred_labels))

        print(
            classification_report(t_labels,
                                  p_labels,
                                  target_names=encoder.classes_))
Ejemplo n.º 7
0
# print("Reading input...")
# utils.read_input_file()

# print("Downloading images...")
# utils.download_images()

# print("Creating image data...")
# utils.create_image_data()

# print("Creating text data...")
# utils.create_text_data()

# print("Creating train test...")
# utils.create_train_test()

n = len(shutils.load_data_pkl(cnt.TRAIN_INDICES_PATH))
m = len(shutils.load_data_pkl(cnt.TEST_INDICES_PATH))

transf_labels = shutils.load_data_pkl(cnt.TRANSFORMED_LABELS_PATH)
num_classes = transf_labels.shape[1]

vocab_size = shutils.load_data_pkl(cnt.VOCAB_SIZE_PATH)

print(n, m)

# print("Training model...")
# network = AttributeExtractionNetwork(dg.get_data_as_generator, n, m, num_classes, vocab_size)
# network.fit()

print("Scoring model...")
network = AttributeExtractionNetwork(dg.get_data_as_generator, n, m,
Ejemplo n.º 8
0
# print("Getting train test tokens...")
# train_indices, test_indices = train_test_split(range(len(items_train)), test_size=0.2)

# shutils.save_data_pkl(train_indices, os.path.join(cnt.PERSISTENCE_PATH, "train_indices.pkl"))
# shutils.save_data_pkl(test_indices, os.path.join(cnt.PERSISTENCE_PATH, "test_indices.pkl"))

# train_data_pairs, test_data_pairs = utils.get_tokens_indices(items_train, train_indices), utils.get_tokens_indices(items_train, test_indices)

# train_data_pairs, test_data_pairs = utils.get_tokens_indices(items_train, range(len(items_train))), utils.get_tokens_indices(items_test, range(len(items_test)))

# shutils.save_data_pkl(train_data_pairs, os.path.join(cnt.PERSISTENCE_PATH, "train_data_pairs.pkl"))
# shutils.save_data_pkl(test_data_pairs, os.path.join(cnt.PERSISTENCE_PATH, "test_data_pairs.pkl"))

n = len(
    shutils.load_data_pkl(
        os.path.join(cnt.PERSISTENCE_PATH, "train_data_pairs.pkl")))
m = len(
    shutils.load_data_pkl(
        os.path.join(cnt.PERSISTENCE_PATH, "test_data_pairs.pkl")))

print("Training model...")
network = DeepMatchingNetwork(dg.get_data_as_generator, n, m)
network.fit()

# print("Scoring model...")
# network = DeepMatchingNetwork(dg.get_data_as_generator, n, m)
# network.scoring()

# network = DeepMatchingNetwork(dg.get_data_as_generator, n, m)
# network.init_model()
# network.load()
Ejemplo n.º 9
0
# src_tensor_train, src_tensor_valid, trg_tensor_train, trg_tensor_valid = train_test_split(src_tensor, trg_tensor, test_size=0.2)

# shutils.save_data_pkl(src_tensor_train, cnt.SRC_TENSOR_TRAIN)
# shutils.save_data_pkl(src_tensor_valid, cnt.SRC_TENSOR_VALID)
# shutils.save_data_pkl(trg_tensor_train, cnt.TRG_TENSOR_TRAIN)
# shutils.save_data_pkl(trg_tensor_valid, cnt.TRG_TENSOR_VALID)

# shutils.save_data_pkl(src_tensor, cnt.SRC_TENSOR)
# shutils.save_data_pkl(trg_tensor, cnt.TRG_TENSOR)

# shutils.save_data_pkl(src_lang, cnt.SRC_LANG)
# shutils.save_data_pkl(trg_lang, cnt.TRG_LANG)

# print(len(src_tensor_train), len(trg_tensor_train), len(src_tensor_valid), len(trg_tensor_valid))

src_tensor_train = shutils.load_data_pkl(cnt.SRC_TENSOR_TRAIN)
trg_tensor_train = shutils.load_data_pkl(cnt.TRG_TENSOR_TRAIN)
src_tensor_valid = shutils.load_data_pkl(cnt.SRC_TENSOR_VALID)
trg_tensor_valid = shutils.load_data_pkl(cnt.TRG_TENSOR_VALID)

src_lang = shutils.load_data_pkl(cnt.SRC_LANG)
trg_lang = shutils.load_data_pkl(cnt.TRG_LANG)

src_tensor = shutils.load_data_pkl(cnt.SRC_TENSOR)
trg_tensor = shutils.load_data_pkl(cnt.TRG_TENSOR)

max_length_src = utils.max_length(src_tensor)
max_length_trg = utils.max_length(trg_tensor)

n, m = len(src_tensor_train), len(src_tensor_valid)