def make_dataset(file_pattern, batch_size, randomize_input=True, num_epochs=1): context_feature_columns, example_feature_columns = _create_feature_columns( ) context_feature_spec = fc.make_parse_example_spec( context_feature_columns.values()) label_column = tf.feature_column.numeric_column(_LABEL, dtype=tf.int64, default_value=-1) example_feature_spec = tf.feature_column.make_parse_example_spec( list(example_feature_columns.values()) + [label_column]) dataset = tfr.data.build_ranking_dataset( file_pattern=file_pattern, data_format=tfr.data.SEQ, batch_size=batch_size, context_feature_spec=context_feature_spec, example_feature_spec=example_feature_spec, list_size=FLAGS.list_size, reader=tf.data.TFRecordDataset, reader_args=['GZIP', 32], shuffle=randomize_input, num_epochs=num_epochs, size_feature_name=_SIZE) def _separate_features_and_label(features): label = tf.squeeze(features.pop(_LABEL), axis=2) label = tf.cast(label, tf.float32) return features, label dataset = dataset.map(_separate_features_and_label) return dataset
def testCalibratedLatticeEnsembleModelInfo(self, lattices, num_lattices, lattice_rank, parameterization, separate_calibrators, output_calibration): self._ResetAllBackends() feature_configs = copy.deepcopy(self.heart_feature_configs) if lattices == 'rtl_layer' or parameterization == 'kronecker_factored': # RTL Layer only supports monotonicity and bound constraints. for feature_config in feature_configs: feature_config.lattice_size = 2 feature_config.unimodality = 'none' feature_config.reflects_trust_in = None feature_config.dominates = None feature_config.regularizer_configs = None model_config = configs.CalibratedLatticeEnsembleConfig( feature_configs=feature_configs, lattices=lattices, num_lattices=num_lattices, lattice_rank=lattice_rank, parameterization=parameterization, separate_calibrators=separate_calibrators, output_calibration=output_calibration, ) estimator = estimators.CannedClassifier( feature_columns=self.heart_feature_columns, model_config=model_config, feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1), prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5), optimizer=tf.keras.optimizers.Adam(0.01), prefitting_optimizer=tf.keras.optimizers.Adam(0.01)) estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20)) # Serving input fn is used to create saved models. serving_input_fn = ( tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec=fc.make_parse_example_spec( self.heart_feature_columns))) saved_model_path = estimator.export_saved_model( estimator.model_dir, serving_input_fn) logging.info('Model exported to %s', saved_model_path) model = estimators.get_model_graph(saved_model_path) expected_num_nodes = ( len(self.heart_feature_columns) + # Input features num_lattices + # One lattice per submodel 1 + # Averaging submodels int(output_calibration)) # Output calibration if separate_calibrators: expected_num_nodes += num_lattices * lattice_rank else: expected_num_nodes += len(self.heart_feature_columns) self.assertLen(model.nodes, expected_num_nodes)
def load_file(self, tf_file, batch_size): print_op = tf.print("opening file: ", tf_file) with tf.control_dependencies([print_op]): dataset = tf.data.TFRecordDataset(tf_file, buffer_size=256 * 1024 * 1024) dataset = dataset.shuffle(buffer_size=batch_size * 10, reshuffle_each_iteration=True) parse_spec = fc.make_parse_example_spec(self.columns) dataset = dataset.map( map_func=lambda x: self.parse_example(x, parse_spec), num_parallel_calls=8) dataset = dataset.batch(batch_size=batch_size) dataset = dataset.prefetch(buffer_size=batch_size * 10) return dataset
def testCalibratedLatticeEnsembleFix2dConstraintViolations( self, feature_names, lattices, num_lattices, lattice_rank, expected_lattices): self._ResetAllBackends() feature_columns = [ feature_column for feature_column in self.boston_feature_columns if feature_column.name in feature_names ] feature_configs = [ feature_config for feature_config in self.boston_feature_configs if feature_config.name in feature_names ] model_config = configs.CalibratedLatticeEnsembleConfig( feature_configs=feature_configs, lattices=lattices, num_lattices=num_lattices, lattice_rank=lattice_rank, ) estimator = estimators.CannedRegressor( feature_columns=feature_columns, model_config=model_config, feature_analysis_input_fn=self._GetBostonTrainInputFn( num_epochs=1), prefitting_input_fn=self._GetBostonTrainInputFn(num_epochs=50), optimizer=tf.keras.optimizers.Adam(0.05), prefitting_optimizer=tf.keras.optimizers.Adam(0.05)) estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200)) # Serving input fn is used to create saved models. serving_input_fn = ( tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec=fc.make_parse_example_spec(feature_columns))) saved_model_path = estimator.export_saved_model( estimator.model_dir, serving_input_fn) logging.info('Model exported to %s', saved_model_path) model = estimators.get_model_graph(saved_model_path) lattices = [] for node in model.nodes: if isinstance(node, model_info.LatticeNode): lattices.append([ input_node.input_node.name for input_node in node.input_nodes ]) self.assertLen(lattices, len(expected_lattices)) for lattice, expected_lattice in zip(lattices, expected_lattices): self.assertCountEqual(lattice, expected_lattice)
def create_feature_columns(dataset, embed_size=32, hash_size=10000): n_users = dataset.user.nunique() n_items = dataset.item.nunique() genre_list = dataset.genre1.unique() users = fc.categorical_column_with_vocabulary_list("user", np.arange(n_users), default_value=-1, dtype=tf.int64) items = fc.categorical_column_with_vocabulary_list("item", np.arange(n_items), default_value=-1, dtype=tf.int64) gender = fc.categorical_column_with_vocabulary_list("gender", ["M", "F"]) age = fc.categorical_column_with_vocabulary_list( "age", [1, 18, 25, 35, 45, 50, 56], dtype=tf.int64) occupation = fc.categorical_column_with_vocabulary_list("occupation", np.arange(21), dtype=tf.int64) genre1 = fc.categorical_column_with_vocabulary_list("genre1", genre_list) genre2 = fc.categorical_column_with_vocabulary_list("genre2", genre_list) genre3 = fc.categorical_column_with_vocabulary_list("genre3", genre_list) wide_cols = [ users, items, gender, age, occupation, genre1, genre2, genre3, fc.crossed_column([gender, age, occupation], hash_bucket_size=hash_size), fc.crossed_column([age, genre1], hash_bucket_size=hash_size) ] embed_cols = [users, items, age, occupation] deep_cols = list() for col in embed_cols: deep_cols.append(fc.embedding_column(col, embed_size)) shared_embed_cols = [genre1, genre2, genre3] deep_cols.extend(fc.shared_embedding_columns(shared_embed_cols, embed_size)) deep_cols.append(fc.indicator_column(gender)) label = fc.numeric_column("label", default_value=0.0, dtype=tf.float32) feat_columns = [label] feat_columns += wide_cols feat_columns += deep_cols feat_spec = fc.make_parse_example_spec(feat_columns) return wide_cols, deep_cols, feat_spec
def testCalibratedLatticeEnsembleModelInfo(self, num_lattices, lattice_rank, separate_calibrators, output_calibration): self._ResetAllBackends() model_config = configs.CalibratedLatticeEnsembleConfig( feature_configs=self.heart_feature_configs, num_lattices=num_lattices, lattice_rank=lattice_rank, separate_calibrators=separate_calibrators, output_calibration=output_calibration, ) estimator = estimators.CannedClassifier( feature_columns=self.heart_feature_columns, model_config=model_config, feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1), prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5), optimizer=tf.keras.optimizers.Adam(0.01), prefitting_optimizer=tf.keras.optimizers.Adam(0.01)) estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20)) # Serving input fn is used to create saved models. serving_input_fn = ( tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec=fc.make_parse_example_spec( self.heart_feature_columns))) saved_model_path = estimator.export_saved_model( estimator.model_dir, serving_input_fn) logging.info('Model exported to %s', saved_model_path) model = estimators.get_model_graph(saved_model_path) expected_num_nodes = ( len(self.heart_feature_columns) + # Input features num_lattices + # One lattice per submodel 1 + # Averaging submodels int(output_calibration)) # Output calibration if separate_calibrators: expected_num_nodes += num_lattices * lattice_rank else: expected_num_nodes += len(self.heart_feature_columns) self.assertLen(model.nodes, expected_num_nodes)
def testCalibratedModelInfo(self, model_type, output_calibration): self._ResetAllBackends() if model_type == 'linear': model_config = configs.CalibratedLinearConfig( feature_configs=self.heart_feature_configs, output_calibration=output_calibration, ) else: model_config = configs.CalibratedLatticeConfig( feature_configs=self.heart_feature_configs, output_calibration=output_calibration, ) estimator = estimators.CannedClassifier( feature_columns=self.heart_feature_columns, model_config=model_config, feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1), prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5), optimizer=tf.keras.optimizers.Adam(0.01), prefitting_optimizer=tf.keras.optimizers.Adam(0.01)) estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20)) # Serving input fn is used to create saved models. serving_input_fn = ( tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec=fc.make_parse_example_spec( self.heart_feature_columns))) saved_model_path = estimator.export_saved_model( estimator.model_dir, serving_input_fn) logging.info('Model exported to %s', saved_model_path) model = estimators.get_model_graph(saved_model_path) expected_num_nodes = ( 2 * len(self.heart_feature_columns) + # Input features and calibration 1 + # Linear or lattice layer int(output_calibration)) # Output calibration self.assertLen(model.nodes, expected_num_nodes)
#feature_label = fc.numeric_column('label', shape=(1,), dtype=tf.float32) #env_columns = tf.FixedLenFeature([1, 4], tf.int64) #exec_time = tf.FixedLenFeature([], tf.float32) cpu_column = fc.numeric_column('cpu', (1, 1)) env_columns = fc.numeric_column('env', (1, 3)) total_ops = fc.numeric_column('total_ops') #exec_time = fc.numeric_column('label') cat_table_size = fc.categorical_column_with_hash_bucket(key='table_size', hash_bucket_size=20) weighted_column_table = fc.weighted_categorical_column( categorical_column=cat_table_size, weight_feature_key='table_size_weight') feature_columns = [ cpu_column, env_columns, weighted_column, total_ops, weighted_column_table ] fmap = fc.make_parse_example_spec(feature_columns) #fmap['env'] = env_columns #fmap['label'] = exec_time #print(fmap) #https://jhui.github.io/2017/11/21/TensorFlow-Importing-data/ def parser(serialized_example): """Parses a single tf.Example into image and label tensors.""" features = tf.parse_single_example( serialized_example, # features={ # 'env': tf.FixedLenFeature([1, 4], tf.int64), # # 'env_segment_number': tf.FixedLenFeature([], tf.int64),
def main(unused_argv): set_tfconfig_environ() dataset = pd.read_csv(FLAGS.dataset, header=None, usecols=[0, 1, 3, 4, 5], names=["user", "item", "gender", "age", "occupation"]) item_unique = np.unique(dataset.item.values) print("num items: ", len(item_unique)) item_id_map = dict(zip(item_unique, np.arange(len(item_unique)))) dataset["item"] = dataset["item"].map(item_id_map) train_data, test_data = train_test_split(dataset) feature_columns = create_feature_columns(train_data) strategy = tf.distribute.experimental.ParameterServerStrategy() classifier = tf.estimator.Estimator( model_fn=model_fn, params={"feature_columns": feature_columns, "hidden_units": map(int, FLAGS.hidden_units.split(",")), "last_hidden_units": FLAGS.last_hidden_units, "lr": FLAGS.learning_rate, "use_bn": FLAGS.use_bn, "n_classes": FLAGS.n_classes, "num_sampled": FLAGS.num_sampled, "top_k": FLAGS.top_k, "eval_top_n": map(int, FLAGS.eval_top_n.split(","))}, config=tf.estimator.RunConfig(model_dir="youtube_dir", save_checkpoints_steps=100000, train_distribute=strategy)) print("train steps: ", FLAGS.train_steps, "batch size: ", FLAGS.batch_size) train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_data, FLAGS.batch_size, mode="train"), max_steps=FLAGS.train_steps) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(test_data, FLAGS.batch_size, mode="eval"), steps=None) print("before train and evaluate") t0 = time.time() tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec) print("after train and evaluate, training time: %.4f" % (time.time() - t0)) t1 = time.time() results = classifier.evaluate(input_fn=lambda: input_fn(test_data, FLAGS.batch_size, mode="eval")) for key in sorted(results): print("%s: %s" % (key, results[key])) print("after evaluate, evaluate time: %.4f" % (time.time() - t1)) print("predict boolean: ", FLAGS.predict) if FLAGS.predict: pred = list(classifier.predict(input_fn=lambda: input_fn(test_data, FLAGS.batch_size, mode="eval"))) import random random.shuffle(pred) print("pred result example: ") for i in range(2): print(pred[i]) elif FLAGS.job_name == "worker" and FLAGS.task_index == 0: print("exporting model...") feature_spec = fc.make_parse_example_spec(feature_columns) print(feature_spec) serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec) classifier.export_saved_model(FLAGS.saved_model_dir, serving_input_receiver_fn) print("save item vector...") nce_weights = classifier.get_variable_value("nce_weights") nce_biases = classifier.get_variable_value("nce_biases") [rows, cols] = nce_weights.shape with tf.gfile.FastGFile(FLAGS.output_item_vector, "w") as f: for i in range(rows): f.write(str(i) + "\t") for j in range(cols): f.write(str(nce_weights[i, j])) f.write(u",") f.write(str(nce_biases[i])) f.write(u"\n") print("quit main")
def main(_): # Parse configs updates from command line flags. config_updates = [] for update in FLAGS.config_updates: config_updates.extend(re.findall(r'(\S*)\s*=\s*(\S*)', update)) # UCI Statlog (Heart) dataset. csv_file = tf.keras.utils.get_file( 'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv') df = pd.read_csv(csv_file) target = df.pop('target') train_size = int(len(df) * 0.8) train_x = df[:train_size] train_y = target[:train_size] test_x = df[train_size:] test_y = target[train_size:] # feature_analysis_input_fn is used to collect statistics about the input # features, thus requiring only one loop of the dataset. # # feature_analysis_input_fn is required if you have at least one FeatureConfig # with "pwl_calibration_input_keypoints='quantiles'". Note that 'quantiles' is # default keypoints configuration so most likely you'll need it. feature_analysis_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn( x=train_x, y=train_y, shuffle=False, batch_size=FLAGS.batch_size, num_epochs=1, num_threads=1) # prefitting_input_fn is used to prefit an initial ensemble that is used to # estimate feature interactions. This prefitting step does not need to fully # converge and thus requiring fewer epochs than the main training. # # prefitting_input_fn is only required if your model_config is # CalibratedLatticeEnsembleConfig with "lattices='crystals'" prefitting_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn( x=train_x, y=train_y, shuffle=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.prefitting_num_epochs, num_threads=1) train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn( x=train_x, y=train_y, shuffle=True, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, num_threads=1) test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn( x=test_x, y=test_y, shuffle=False, batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, num_threads=1) # Feature columns. # - age # - sex # - cp chest pain type (4 values) # - trestbps resting blood pressure # - chol serum cholestoral in mg/dl # - fbs fasting blood sugar > 120 mg/dl # - restecg resting electrocardiographic results (values 0,1,2) # - thalach maximum heart rate achieved # - exang exercise induced angina # - oldpeak ST depression induced by exercise relative to rest # - slope the slope of the peak exercise ST segment # - ca number of major vessels (0-3) colored by flourosopy # - thal 3 = normal; 6 = fixed defect; 7 = reversable defect feature_columns = [ fc.numeric_column('age', default_value=-1), fc.categorical_column_with_vocabulary_list('sex', [0, 1]), fc.numeric_column('cp'), fc.numeric_column('trestbps', default_value=-1), fc.numeric_column('chol'), fc.categorical_column_with_vocabulary_list('fbs', [0, 1]), fc.categorical_column_with_vocabulary_list('restecg', [0, 1, 2]), fc.numeric_column('thalach'), fc.categorical_column_with_vocabulary_list('exang', [0, 1]), fc.numeric_column('oldpeak'), fc.categorical_column_with_vocabulary_list('slope', [0, 1, 2]), fc.numeric_column('ca'), fc.categorical_column_with_vocabulary_list( 'thal', ['normal', 'fixed', 'reversible']), ] # Feature configs are used to specify how each feature is calibrated and used. feature_configs = [ configs.FeatureConfig( name='age', lattice_size=3, # By default, input keypoints of pwl are quantiles of the feature. pwl_calibration_num_keypoints=5, monotonicity='increasing', pwl_calibration_clip_max=100, ), configs.FeatureConfig( name='cp', pwl_calibration_num_keypoints=4, # Keypoints can be uniformly spaced. pwl_calibration_input_keypoints='uniform', monotonicity='increasing', ), configs.FeatureConfig( name='chol', # Explicit input keypoint initialization. pwl_calibration_input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0], monotonicity='increasing', pwl_calibration_clip_min=130, # Calibration can be forced to span the full output range by clamping. pwl_calibration_clamp_min=True, pwl_calibration_clamp_max=True, # Per feature regularization. regularizer_configs=[ configs.RegularizerConfig(name='calib_hessian', l2=1e-4), ], ), configs.FeatureConfig( name='fbs', # Monotonicity: output for 1 should be larger than output for 0. monotonicity=[(0, 1)], ), configs.FeatureConfig( name='trestbps', pwl_calibration_num_keypoints=5, monotonicity='decreasing', ), configs.FeatureConfig( name='thalach', pwl_calibration_num_keypoints=5, monotonicity='decreasing', ), configs.FeatureConfig( name='restecg', # Categorical monotonicity can be partial order. monotonicity=[(0, 1), (0, 2)], ), configs.FeatureConfig( name='exang', monotonicity=[(0, 1)], ), configs.FeatureConfig( name='oldpeak', pwl_calibration_num_keypoints=5, monotonicity='increasing', ), configs.FeatureConfig( name='slope', monotonicity=[(0, 1), (1, 2)], ), configs.FeatureConfig( name='ca', pwl_calibration_num_keypoints=4, monotonicity='increasing', ), configs.FeatureConfig( name='thal', monotonicity=[('normal', 'fixed'), ('normal', 'reversible')], ), ] # Serving input fn is used to create saved models. serving_input_fn = ( tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec=fc.make_parse_example_spec(feature_columns))) # Model config defines the model strcutre for the estimator. # This is calibrated linear model with outputput calibration: Inputs are # calibrated, linearly combined and the output of the linear layer is # calibrated again using a PWL function. model_config = configs.CalibratedLinearConfig( feature_configs=feature_configs, use_bias=True, output_calibration=True, regularizer_configs=[ # Regularizer for the output calibrator. configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4), ]) # Update model configuration. # See tfl.configs.apply_updates for details. configs.apply_updates(model_config, config_updates) estimator = estimators.CannedClassifier( feature_columns=feature_columns, model_config=model_config, feature_analysis_input_fn=feature_analysis_input_fn, optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate)) estimator.train(input_fn=train_input_fn) results = estimator.evaluate(input_fn=test_input_fn) print('Calibrated linear results: {}'.format(results)) print('Calibrated linear model exported to {}'.format( estimator.export_saved_model(estimator.model_dir, serving_input_fn))) # This is calibrated lattice model: Inputs are calibrated, then combined # non-linearly using a lattice layer. model_config = configs.CalibratedLatticeConfig( feature_configs=feature_configs, regularizer_configs=[ # Torsion regularizer applied to the lattice to make it more linear. configs.RegularizerConfig(name='torsion', l2=1e-4), # Globally defined calibration regularizer is applied to all features. configs.RegularizerConfig(name='calib_hessian', l2=1e-4), ]) estimator = estimators.CannedClassifier( feature_columns=feature_columns, model_config=model_config, feature_analysis_input_fn=feature_analysis_input_fn, optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate)) estimator.train(input_fn=train_input_fn) results = estimator.evaluate(input_fn=test_input_fn) print('Calibrated lattice results: {}'.format(results)) print('Calibrated lattice model exported to {}'.format( estimator.export_saved_model(estimator.model_dir, serving_input_fn))) # This is random lattice ensemble model with separate calibration: # model output is the average output of separately calibrated lattices. model_config = configs.CalibratedLatticeEnsembleConfig( feature_configs=feature_configs, num_lattices=6, lattice_rank=5, separate_calibrators=True, regularizer_configs=[ # Torsion regularizer applied to the lattice to make it more linear. configs.RegularizerConfig(name='torsion', l2=1e-4), # Globally defined calibration regularizer is applied to all features. configs.RegularizerConfig(name='calib_hessian', l2=1e-4), ]) configs.apply_updates(model_config, config_updates) estimator = estimators.CannedClassifier( feature_columns=feature_columns, model_config=model_config, feature_analysis_input_fn=feature_analysis_input_fn, optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate)) estimator.train(input_fn=train_input_fn) results = estimator.evaluate(input_fn=test_input_fn) print('Random ensemble results: {}'.format(results)) print('Random ensemble model exported to {}'.format( estimator.export_saved_model(estimator.model_dir, serving_input_fn))) # This is Crystals ensemble model with separate calibration: model output is # the average output of separately calibrated lattices. # Crystals algorithm first trains a prefitting model and uses the interactions # between features to form the final lattice ensemble. model_config = configs.CalibratedLatticeEnsembleConfig( feature_configs=feature_configs, # Using Crystals algorithm. lattices='crystals', num_lattices=6, lattice_rank=5, separate_calibrators=True, regularizer_configs=[ # Torsion regularizer applied to the lattice to make it more linear. configs.RegularizerConfig(name='torsion', l2=1e-4), # Globally defined calibration regularizer is applied to all features. configs.RegularizerConfig(name='calib_hessian', l2=1e-4), ]) configs.apply_updates(model_config, config_updates) estimator = estimators.CannedClassifier( feature_columns=feature_columns, model_config=model_config, feature_analysis_input_fn=feature_analysis_input_fn, # prefitting_input_fn is required to train the prefitting model. prefitting_input_fn=prefitting_input_fn, optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate)) estimator.train(input_fn=train_input_fn) results = estimator.evaluate(input_fn=test_input_fn) print('Crystals ensemble results: {}'.format(results)) print('Crystals ensemble model exported to {}'.format( estimator.export_saved_model(estimator.model_dir, serving_input_fn)))
def get_feature_spec(self): return fc.make_parse_example_spec([self.get_feature_column()])
from .eval_metrics import AverageNClass, HitAtOne N_CLASS = 3862 BATCH_SIZE = 1024 VOCAB_FILE = "data/vocabulary.csv" # Exclude audio feature since we didn't implement audio feature extraction. # Even if the model can be trained on audio feature, # they won't be available for inference on new video. FEAT_COL_VIDEO = [ fc.numeric_column(key="mean_rgb", shape=(1024, ), dtype=tf.float32), #fc.numeric_column(key="mean_audio", shape=(128,), dtype=tf.float32), fc.indicator_column( fc.categorical_column_with_identity(key="labels", num_buckets=N_CLASS)) ] FEAT_X = ["mean_rgb"] FEAT_SPEC_VIDEO = fc.make_parse_example_spec(FEAT_COL_VIDEO) MULTI_HOT_ENCODER = tf.keras.layers.DenseFeatures(FEAT_COL_VIDEO[-1]) # If we'd like to use a custom serving input function, we need to use the estimator API. # There is no document on how a keras model can use a custom serving input function. KERAS_TO_ESTIMATOR = True def calc_class_weight(infile, scale=1): """Calculate class weight to re-balance label distribution. The class weight for class i (w_i) is determined by: w_i = total no. samples / (n_class * count(class i)) """ if infile.startswith("gs://"): with file_io.FileIO(infile, "r") as f: vocab = pd.read_csv(f) else: