Exemple #1
0
def CSVPrepro(
    vocab_size: int,
    batch_size: int = 512,
    repeat_size: Optional[int] = None,
    prefetch_size: int = 1,
    num_parallel_calls: int = 8,
    num_negatives: int = None,
):
    """CSV Preprocessing for MovieLens."""
    fields = [
        F.UID,
        F.INPUT_POSITIVES,
        F.INPUT_MASK,
        F.TARGET_POSITIVES,
        F.TARGET_MASK,
        F.INPUT_POSITIVES_ONE_HOT(vocab_size),
        F.TARGET_POSITIVES_ONE_HOT(vocab_size),
    ]
    return deepr.prepros.Serial(
        deepr.prepros.Map(
            SequenceMask(inputs="inputPositives", outputs="inputMask")),
        deepr.prepros.Map(
            SequenceMask(inputs="targetPositives", outputs="targetMask")),
        deepr.prepros.PaddedBatch(batch_size=batch_size, fields=fields),
        deepr.prepros.Map(
            RandomNegatives(inputs="targetPositives",
                            outputs="targetNegatives",
                            num_negatives=num_negatives,
                            vocab_size=vocab_size))
        if num_negatives is not None else [],
        deepr.prepros.Repeat(repeat_size, modes=[deepr.TRAIN]),
        deepr.prepros.Prefetch(prefetch_size),
        num_parallel_calls=num_parallel_calls,
    )
Exemple #2
0
 def __init__(self, path_csv_tr: str, path_csv_te: str, vocab_size: int):
     self.path_csv_tr = path_csv_tr
     self.path_csv_te = path_csv_te
     self.vocab_size = vocab_size
     self.fields = [
         fields.UID,
         fields.INPUT_POSITIVES,
         fields.TARGET_POSITIVES,
         fields.INPUT_POSITIVES_ONE_HOT(vocab_size),
         fields.TARGET_POSITIVES_ONE_HOT(vocab_size),
     ]
Exemple #3
0
 def __init__(self,
              path_csv: str,
              vocab_size: int,
              target_ratio: float = None,
              shuffle: bool = True,
              seed: int = 98765):
     self.path_csv = path_csv
     self.vocab_size = vocab_size
     self.target_ratio = target_ratio
     self.shuffle = shuffle
     self.seed = seed
     self.fields = [
         fields.UID,
         fields.INPUT_POSITIVES,
         fields.TARGET_POSITIVES,
         fields.INPUT_POSITIVES_ONE_HOT(vocab_size),
         fields.TARGET_POSITIVES_ONE_HOT(vocab_size),
     ]