コード例 #1
0
def _standarize_feature_label_dataset(dataset, model):
    input_names = model.input_names
    output_names = model.output_names
    rdd = dataset.rdd.map(lambda x: (x[0], _process_labels(x[1])))\
        .map(lambda sample: _training_reorder(sample, input_names, output_names))
    if dataset.val_rdd is not None:
        val_rdd = dataset.val_rdd.map(lambda x: (x[0], _process_labels(x[1])))\
            .map(lambda sample: _training_reorder(sample, input_names, output_names))
    else:
        val_rdd = None
    tensor_structure = _training_reorder(dataset.tensor_structure, input_names,
                                         output_names)
    new_dataset = TFDataset(rdd, tensor_structure, dataset.batch_size, -1,
                            dataset.hard_code_batch_size, val_rdd)
    new_dataset.batch_per_thread = dataset.batch_per_thread
    return new_dataset
コード例 #2
0
def _standarize_feature_label_dataset(dataset, model):
    input_names = model.input_names
    output_names = model.output_names

    def _process_labels(ys):
        if isinstance(ys, dict):
            return {
                k: np.expand_dims(y, axis=1) if y.ndim == 0 else y
                for k, y in ys.items()
            }
        elif isinstance(ys, list):
            return [
                np.expand_dims(y, axis=1) if y.ndim == 0 else y for y in ys
            ]
        else:
            return np.expand_dims(ys, axis=1) if ys.ndim == 0 else ys

    def _training_reorder(x, input_names, output_names):
        assert isinstance(x, tuple)

        return _reorder(x[0], input_names) + _reorder(x[1], output_names)

    def _reorder(x, names):
        if isinstance(x, dict):
            return [x[name] for name in names]
        elif isinstance(x, list):
            return x
        else:
            return [x]

    rdd = dataset.rdd.map(lambda x: (x[0], _process_labels(x[1])))\
        .map(lambda sample: _training_reorder(sample, input_names, output_names))
    if dataset.val_rdd is not None:
        val_rdd = dataset.val_rdd.map(lambda x: (x[0], _process_labels(x[1])))\
            .map(lambda sample: _training_reorder(sample, input_names, output_names))
    else:
        val_rdd = None
    tensor_structure = _training_reorder(dataset.tensor_structure, input_names,
                                         output_names)
    new_dataset = TFDataset(rdd, tensor_structure, dataset.batch_size, -1,
                            dataset.hard_code_batch_size, val_rdd)
    new_dataset.batch_per_thread = dataset.batch_per_thread
    return new_dataset