def eval_input_fn(batch_size, cache=False): feature_types = [] for name in feature_column_names: # NOTE: vector columns like 23,21,3,2,0,0 should use shape None if feature_metas[name]["is_sparse"]: feature_types.append((tf.int64, tf.int32, tf.int64)) else: feature_types.append( get_dtype(feature_metas[name]["dtype"])) gen = db_generator(conn.driver, conn, select, feature_column_names, label_meta["feature_name"], feature_metas) dataset = tf.data.Dataset.from_generator( gen, (tuple(feature_types), eval("tf.%s" % label_meta["dtype"]))) ds_mapper = functools.partial( parse_sparse_feature, feature_column_names=feature_column_names, feature_metas=feature_metas) dataset = dataset.map(ds_mapper).batch(batch_size) if cache: dataset = dataset.cache( "cache/predict" if TF_VERSION_2 else "") return dataset
def fast_input_fn(generator): feature_types = [] for name in feature_column_names: if feature_metas[name]["is_sparse"]: feature_types.append((tf.int64, tf.int32, tf.int64)) else: feature_types.append( get_dtype(feature_metas[name]["dtype"])) def _inner_input_fn(): dataset = tf.data.Dataset.from_generator( generator, (tuple(feature_types), eval( "tf.%s" % label_meta["dtype"]))) ds_mapper = functools.partial( parse_sparse_feature, feature_column_names=feature_column_names, feature_metas=feature_metas) dataset = dataset.map(ds_mapper).batch(1).cache( filename="dataset_cache_pred.txt") iterator = dataset.make_one_shot_iterator() features = iterator.get_next() return features return _inner_input_fn
def pai_maxcompute_input_fn(): table_parts = pai_table.split(".") if len(table_parts) == 2: database, table_name = table_parts elif len(table_parts) == 1: table_name = pai_table driver, dsn = datasource.split("://") database = parseMaxComputeDSN(dsn)[-1] else: raise ValueError("error database.table format: %s" % pai_table) tables = ["odps://%s/tables/%s" % (database, table_name)] record_defaults = [] for name in feature_column_names: dtype = get_dtype(feature_metas[name]["dtype"]) record_defaults.append(tf.constant(0, dtype=dtype, shape=feature_metas[name]["shape"])) dataset = tf.data.TableRecordDataset(tables, record_defaults=record_defaults, selected_cols=",".join(feature_column_names)) def tensor_to_dict(*args): num_features = len(feature_column_names) features_dict = dict() for idx in range(num_features): name = feature_column_names[idx] features_dict[name] = tf.reshape(args[idx], [-1]) return features_dict return dataset.map(tensor_to_dict)