def test_batching(self):
     dataset_1 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
     dataset_2 = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
     dataset_3 = [
         "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"
     ]
     # Case 1
     batcher = batching(list_of_iterables=[dataset_1, dataset_2, dataset_3],
                        n=2,
                        infinite=False,
                        return_incomplete_batches=False)
     batches_1, batches_2, batches_3 = zip(*list(batcher))
     self.assertEqual(5, len(batches_1))
     self.assertEqual(5, len(batches_2))
     self.assertEqual(5, len(batches_3))
     self.assertListEqual(dataset_1, flatten(batches_1))
     self.assertListEqual(dataset_2, flatten(batches_2))
     self.assertListEqual(dataset_3, flatten(batches_3))
     # Case 2
     batcher = batching(list_of_iterables=[dataset_1, dataset_2, dataset_3],
                        n=3,
                        infinite=False,
                        return_incomplete_batches=True)
     batches_1, batches_2, batches_3 = zip(*list(batcher))
     self.assertEqual(4, len(batches_1))
     self.assertEqual(4, len(batches_2))
     self.assertEqual(4, len(batches_3))
     self.assertListEqual(dataset_1, flatten(batches_1))
     self.assertListEqual(dataset_2, flatten(batches_2))
     self.assertListEqual(dataset_3, flatten(batches_3))
     # Case 3
     batcher = batching(list_of_iterables=[dataset_1, dataset_2, dataset_3],
                        n=3,
                        infinite=False,
                        return_incomplete_batches=False)
     batches_1, batches_2, batches_3 = zip(*list(batcher))
     self.assertEqual(3, len(batches_1))
     self.assertEqual(3, len(batches_2))
     self.assertEqual(3, len(batches_3))
     self.assertListEqual(dataset_1[:-1], flatten(batches_1))
     self.assertListEqual(dataset_2[:-1], flatten(batches_2))
     self.assertListEqual(dataset_3[:-1], flatten(batches_3))
 def predict(self, x, batch_size):
     batcher = batching([list(x)], n=batch_size, return_incomplete_batches=True)
     preds = []
     for batch_x in batcher:
         batch_x = batch_x[0]
         u, i = list(zip(*list(batch_x)))
         preds.append(np.sum(self.u_emb[list(u)] * self.i_emb[list(i)] \
                       + self.u_bias[list(u)] \
                       + self.i_bias[list(i)], axis=1, keepdims=True))
     preds = np.row_stack(preds)
     return preds
Exemple #3
0
 def get_batches(self, return_incomplete_batches: bool = False):
     list_of_iterables = [self.audios, self.targets] if not self.scoring else [self.audios]
     for batch in batching(list_of_iterables=list_of_iterables,
                           n=self.batch_size,
                           return_incomplete_batches=return_incomplete_batches):
         batch[0] = np.expand_dims(np.array(batch[0]), 1)
         batch[0] = torch.from_numpy(batch[0])
         if self.scoring:
             batch += [None]
         else:
             batch[1] = torch.from_numpy(batch[1])
         yield batch
Exemple #4
0
 def predict(self, x, batch_size):
     batcher = batching([list(x)],
                        n=batch_size,
                        return_incomplete_batches=True)
     preds = []
     for batch_x in batcher:
         batch_x = batch_x[0]
         u_ids, i_ids = list(zip(*list(batch_x)))
         preds.append(
             self.sess.run(self.output,
                           feed_dict={
                               self.ph_u_ids: u_ids,
                               self.ph_i_ids: i_ids,
                               self.ph_keep_prob: 1.0
                           }))
     preds = np.row_stack(preds)
     return preds
Exemple #5
0
def get_batcher(df, b_size=16, train=True):
    columns_target = [
        "toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"
    ]
    if train:
        pool = [
            df.id.values.tolist(),
            df.code.map(np.matrix).values.tolist(),
            df[columns_target].values.tolist()
        ]
        batcher = batching(pool, b_size)
        for element in batcher:
            max_len = max(map(lambda x: x.shape[1], element[1]))
            batch = np.row_stack(
                list(
                    map(
                        lambda x: np.pad(np.array(x)[0],
                                         (0, max_len - x.shape[1]),
                                         mode="constant"), element[1])))
            targets = np.row_stack(element[2])
            yield element[0], batch, targets
 def fit(self, x, y, batch_size):
     batcher = batching([list(x), list(y)], n=batch_size, return_incomplete_batches=True)
     for batch_x, batch_y in batcher:
         self.train_on_batch(batch_x, batch_y)
Exemple #7
0
def get_batches_generator(
    df_time,
    df_static,
    batch_size=128,
    min_history=300,
    forecast_horizon=7,
    shuffle=True,
    shuffle_present=True,
    cuda=False,
):
    from src.constants import (
        numeric_feats,
        categorical_feats,
        target_name,
        batch_time_normalizable_feats,
        embedding_sizes,
    )

    logger.info("Shuffling dataframe...")
    df_time, df_static = shuffle_multiple(df_time, df_static)
    logger.info("Shuffle successful!")

    # Assure perfect alignment
    case_static = df_static[["store_nbr", "item_nbr"]]
    case_time = df_time[:, 0][["store_nbr", "item_nbr"]]
    assert (case_static == case_time).all()

    time_steps = df_time.shape[1]

    batcher = batching(
        list_of_iterables=[df_time, df_static],
        n=batch_size,
        return_incomplete_batches=False,
    )

    num_time_feats = np.intersect1d(numeric_feats, df_time.dtype.names)
    num_static_feats = np.intersect1d(numeric_feats, df_static.dtype.names)
    cat_time_feats = np.intersect1d(categorical_feats, df_time.dtype.names)
    cat_static_feats = np.intersect1d(categorical_feats, df_static.dtype.names)

    for batch_time, batch_static in batcher:
        if shuffle_present:
            present = random.randint(min_history,
                                     time_steps - forecast_horizon)
        else:
            present = time_steps - forecast_horizon

        # Numerical time-dependent features
        numeric_time_batch = batch_time[num_time_feats][:, :present]

        # Categorical time-dependent features
        cat_time_batch = batch_time[cat_time_feats][:, :present]

        # Numerical static features (Not defined)
        # numeric_static_batch = batch_static[num_static_feats]

        # Categorical static features
        cat_static_batch = batch_static[cat_static_feats]

        # Target
        target = batch_time[target_name][:,
                                         present:(present + forecast_horizon)]

        # Convert to arrays
        numeric_time_batch = recarray_to_array(numeric_time_batch,
                                               np.float32).swapaxes(0, 1)
        cat_time_batch = recarray_to_array(cat_time_batch,
                                           np.int32).swapaxes(0, 1)
        cat_static_batch = recarray_to_array(cat_static_batch, np.int32)
        target = target.astype(np.float32).swapaxes(0, 1)

        # Convert to torch tensors
        numeric_time_batch = torch.from_numpy(numeric_time_batch)
        cat_time_batch = torch.from_numpy(cat_time_batch).long()
        cat_static_batch = torch.from_numpy(cat_static_batch).long()
        target = torch.from_numpy(target)

        # Move to cuda if required
        if cuda:
            numeric_time_batch = numeric_time_batch.cuda()
            cat_time_batch = cat_time_batch.cuda()
            cat_static_batch = cat_static_batch.cuda()
            target = target.cuda()
        yield numeric_time_batch, cat_time_batch, cat_static_batch, target