Example #1
0
        def eval_input_fn(batch_size, cache=False):
            feature_types = []
            for name in feature_column_names:
                # NOTE: vector columns like 23,21,3,2,0,0 should use shape None
                if feature_metas[name]["is_sparse"]:
                    feature_types.append((tf.int64, tf.int32, tf.int64))
                else:
                    feature_types.append(
                        get_dtype(feature_metas[name]["dtype"]))

            gen = db_generator(conn.driver, conn, select, feature_column_names,
                               label_meta["feature_name"], feature_metas)

            dataset = tf.data.Dataset.from_generator(
                gen,
                (tuple(feature_types), eval("tf.%s" % label_meta["dtype"])))
            ds_mapper = functools.partial(
                parse_sparse_feature,
                feature_column_names=feature_column_names,
                feature_metas=feature_metas)
            dataset = dataset.map(ds_mapper).batch(batch_size)
            if cache:
                dataset = dataset.cache(
                    "cache/predict" if TF_VERSION_2 else "")
            return dataset
Example #2
0
        def fast_input_fn(generator):
            feature_types = []
            for name in feature_column_names:
                if feature_metas[name]["is_sparse"]:
                    feature_types.append((tf.int64, tf.int32, tf.int64))
                else:
                    feature_types.append(
                        get_dtype(feature_metas[name]["dtype"]))

            def _inner_input_fn():
                dataset = tf.data.Dataset.from_generator(
                    generator,
                    (tuple(feature_types), eval(
                        "tf.%s" % label_meta["dtype"])))
                ds_mapper = functools.partial(
                    parse_sparse_feature,
                    feature_column_names=feature_column_names,
                    feature_metas=feature_metas)
                dataset = dataset.map(ds_mapper).batch(1).cache(
                    filename="dataset_cache_pred.txt")
                iterator = dataset.make_one_shot_iterator()
                features = iterator.get_next()
                return features

            return _inner_input_fn
Example #3
0
        def pai_maxcompute_input_fn():
            table_parts = pai_table.split(".")
            if len(table_parts) == 2:
                database, table_name = table_parts
            elif len(table_parts) == 1:
                table_name = pai_table
                driver, dsn = datasource.split("://")
                database = parseMaxComputeDSN(dsn)[-1]
            else:
                raise ValueError("error database.table format: %s" % pai_table)

            tables = ["odps://%s/tables/%s" % (database, table_name)]
            record_defaults = []
            for name in feature_column_names:
                dtype = get_dtype(feature_metas[name]["dtype"])
                record_defaults.append(tf.constant(0, dtype=dtype, shape=feature_metas[name]["shape"]))

            dataset = tf.data.TableRecordDataset(tables,
                                        record_defaults=record_defaults,
                                        selected_cols=",".join(feature_column_names))
            def tensor_to_dict(*args):
                num_features = len(feature_column_names)
                features_dict = dict()
                for idx in range(num_features):
                    name = feature_column_names[idx]
                    features_dict[name] = tf.reshape(args[idx], [-1])
                return features_dict

            return dataset.map(tensor_to_dict)