Ejemplo n.º 1
0
def make_dataset(lst: List[Tuple],
                 bias=None,
                 sample=None,
                 shuffle=False) -> tf.data.Dataset:
    """Convert tuples from `convert_to_tuples` into a tf.data.Dataset"""
    dataset_structure = list(base_features)
    if bias:
        n = len(base_features)
        lst = [x[:n] + (bias[x[0]], ) + x[n:] for x in lst]
        dataset_structure.append(("bias", tf.float32, (None, 2)))

    dataset_structure += label_structure

    ds_names, ds_dtypes, ds_shapes = [
        tuple(x) for x in py_utils.transpose_lists(dataset_structure)
    ]

    get = build_epoch_fn(lst, sample, shuffle)
    data = tf.data.Dataset.from_generator(get, ds_dtypes, ds_shapes)

    def to_dict(*args):
        labels = {
            k: v
            for k, v in zip(ds_names[-n_label_elements:],
                            args[-n_label_elements:])
        }
        features = {
            k: v
            for k, v in zip(ds_names[:-n_label_elements],
                            args[:-n_label_elements])
        }
        features["label"] = labels
        return features

    return data.map(to_dict)
Ejemplo n.º 2
0
def make_dataset_stratify(lst: List[Tuple], bias, n_groups) -> tf.data.Dataset:
    """Convert tuples from `convert_to_tuples` into a tf.data.Dataset,
  while stratifying the bias accuracy"""
    dataset_structure = list(base_features)
    if bias:
        n = len(base_features)
        lst = [x[:n] + (bias[x[0]], ) + x[n:] for x in lst]
        dataset_structure.append(("bias", tf.float32, (None, 2)))

    dataset_structure += label_structure

    ds_names, ds_dtypes, ds_shapes = [
        tuple(x) for x in py_utils.transpose_lists(dataset_structure)
    ]

    bias_ix = [i for i, name in enumerate(ds_names) if name == "bias"]
    if len(bias_ix) != 1:
        raise ValueError()
    bias_ix = bias_ix[0]

    bias_probs = []
    for example in lst:
        bias = example[bias_ix]
        spans = example[-2]
        if len(spans) == 0:
            bias_probs.append(0)
        else:
            valid = example[-2]
            bias_probs.append(bias[valid].sum())

    ix = np.argsort(bias_probs)
    lst = [lst[i] for i in ix]

    fn = build_stratified_epoch_fn(lst, n_groups)

    lst = tf.data.Dataset.from_generator(fn, ds_dtypes, ds_shapes)

    def to_dict(*args):
        labels = {
            k: v
            for k, v in zip(ds_names[-n_label_elements:],
                            args[-n_label_elements:])
        }
        features = {
            k: v
            for k, v in zip(ds_names[:-n_label_elements],
                            args[:-n_label_elements])
        }
        features["label"] = labels
        return features

    return lst.map(to_dict)
Ejemplo n.º 3
0
def make_dataset(data: List[TextPairExample],
                 bias: Optional[Dict] = None,
                 sample=None,
                 shuffle=True) -> tf.data.Dataset:
    if bias:
        data = [tuple(x) + (bias[x.id], ) for x in data]

    fn = build_epoch_fn(data, sample, shuffle=shuffle)
    structure = [("id", tf.string, ()), (PREMISE_KEY, tf.string, (None, )),
                 (HYPOTHESIS_KEY, tf.string, (None, )),
                 ("label", tf.int32, ())]
    if bias:
        structure.append(("bias", tf.float32, (3, )))
    names, dtypes, shapes = py_utils.transpose_lists(structure)
    ds = tf.data.Dataset.from_generator(fn, tuple(dtypes), tuple(shapes))

    def to_map(*args):
        return {k: v for k, v in zip(names, args)}

    return ds.map(to_map)