コード例 #1
0
def make_dataset(file_pattern, batch_size, randomize_input=True, num_epochs=1):
    context_feature_columns, example_feature_columns = _create_feature_columns(
    )
    context_feature_spec = fc.make_parse_example_spec(
        context_feature_columns.values())
    label_column = tf.feature_column.numeric_column(_LABEL,
                                                    dtype=tf.int64,
                                                    default_value=-1)
    example_feature_spec = tf.feature_column.make_parse_example_spec(
        list(example_feature_columns.values()) + [label_column])
    dataset = tfr.data.build_ranking_dataset(
        file_pattern=file_pattern,
        data_format=tfr.data.SEQ,
        batch_size=batch_size,
        context_feature_spec=context_feature_spec,
        example_feature_spec=example_feature_spec,
        list_size=FLAGS.list_size,
        reader=tf.data.TFRecordDataset,
        reader_args=['GZIP', 32],
        shuffle=randomize_input,
        num_epochs=num_epochs,
        size_feature_name=_SIZE)

    def _separate_features_and_label(features):
        label = tf.squeeze(features.pop(_LABEL), axis=2)
        label = tf.cast(label, tf.float32)
        return features, label

    dataset = dataset.map(_separate_features_and_label)
    return dataset
コード例 #2
0
ファイル: estimators_test.py プロジェクト: mikechen66/lattice
    def testCalibratedLatticeEnsembleModelInfo(self, lattices, num_lattices,
                                               lattice_rank, parameterization,
                                               separate_calibrators,
                                               output_calibration):
        self._ResetAllBackends()
        feature_configs = copy.deepcopy(self.heart_feature_configs)
        if lattices == 'rtl_layer' or parameterization == 'kronecker_factored':
            # RTL Layer only supports monotonicity and bound constraints.
            for feature_config in feature_configs:
                feature_config.lattice_size = 2
                feature_config.unimodality = 'none'
                feature_config.reflects_trust_in = None
                feature_config.dominates = None
                feature_config.regularizer_configs = None
        model_config = configs.CalibratedLatticeEnsembleConfig(
            feature_configs=feature_configs,
            lattices=lattices,
            num_lattices=num_lattices,
            lattice_rank=lattice_rank,
            parameterization=parameterization,
            separate_calibrators=separate_calibrators,
            output_calibration=output_calibration,
        )
        estimator = estimators.CannedClassifier(
            feature_columns=self.heart_feature_columns,
            model_config=model_config,
            feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
            prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5),
            optimizer=tf.keras.optimizers.Adam(0.01),
            prefitting_optimizer=tf.keras.optimizers.Adam(0.01))
        estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20))

        # Serving input fn is used to create saved models.
        serving_input_fn = (
            tf.estimator.export.build_parsing_serving_input_receiver_fn(
                feature_spec=fc.make_parse_example_spec(
                    self.heart_feature_columns)))
        saved_model_path = estimator.export_saved_model(
            estimator.model_dir, serving_input_fn)
        logging.info('Model exported to %s', saved_model_path)
        model = estimators.get_model_graph(saved_model_path)

        expected_num_nodes = (
            len(self.heart_feature_columns) +  # Input features
            num_lattices +  # One lattice per submodel
            1 +  # Averaging submodels
            int(output_calibration))  # Output calibration
        if separate_calibrators:
            expected_num_nodes += num_lattices * lattice_rank
        else:
            expected_num_nodes += len(self.heart_feature_columns)

        self.assertLen(model.nodes, expected_num_nodes)
コード例 #3
0
ファイル: loader.py プロジェクト: wuqingzhou828/dl-rerank
 def load_file(self, tf_file, batch_size):
     print_op = tf.print("opening file: ", tf_file)
     with tf.control_dependencies([print_op]):
         dataset = tf.data.TFRecordDataset(tf_file,
                                           buffer_size=256 * 1024 * 1024)
         dataset = dataset.shuffle(buffer_size=batch_size * 10,
                                   reshuffle_each_iteration=True)
         parse_spec = fc.make_parse_example_spec(self.columns)
         dataset = dataset.map(
             map_func=lambda x: self.parse_example(x, parse_spec),
             num_parallel_calls=8)
         dataset = dataset.batch(batch_size=batch_size)
         dataset = dataset.prefetch(buffer_size=batch_size * 10)
         return dataset
コード例 #4
0
ファイル: estimators_test.py プロジェクト: mikechen66/lattice
    def testCalibratedLatticeEnsembleFix2dConstraintViolations(
            self, feature_names, lattices, num_lattices, lattice_rank,
            expected_lattices):
        self._ResetAllBackends()
        feature_columns = [
            feature_column for feature_column in self.boston_feature_columns
            if feature_column.name in feature_names
        ]
        feature_configs = [
            feature_config for feature_config in self.boston_feature_configs
            if feature_config.name in feature_names
        ]

        model_config = configs.CalibratedLatticeEnsembleConfig(
            feature_configs=feature_configs,
            lattices=lattices,
            num_lattices=num_lattices,
            lattice_rank=lattice_rank,
        )
        estimator = estimators.CannedRegressor(
            feature_columns=feature_columns,
            model_config=model_config,
            feature_analysis_input_fn=self._GetBostonTrainInputFn(
                num_epochs=1),
            prefitting_input_fn=self._GetBostonTrainInputFn(num_epochs=50),
            optimizer=tf.keras.optimizers.Adam(0.05),
            prefitting_optimizer=tf.keras.optimizers.Adam(0.05))
        estimator.train(input_fn=self._GetBostonTrainInputFn(num_epochs=200))

        # Serving input fn is used to create saved models.
        serving_input_fn = (
            tf.estimator.export.build_parsing_serving_input_receiver_fn(
                feature_spec=fc.make_parse_example_spec(feature_columns)))
        saved_model_path = estimator.export_saved_model(
            estimator.model_dir, serving_input_fn)
        logging.info('Model exported to %s', saved_model_path)
        model = estimators.get_model_graph(saved_model_path)
        lattices = []
        for node in model.nodes:
            if isinstance(node, model_info.LatticeNode):
                lattices.append([
                    input_node.input_node.name
                    for input_node in node.input_nodes
                ])

        self.assertLen(lattices, len(expected_lattices))
        for lattice, expected_lattice in zip(lattices, expected_lattices):
            self.assertCountEqual(lattice, expected_lattice)
コード例 #5
0
def create_feature_columns(dataset, embed_size=32, hash_size=10000):
    n_users = dataset.user.nunique()
    n_items = dataset.item.nunique()
    genre_list = dataset.genre1.unique()
    users = fc.categorical_column_with_vocabulary_list("user",
                                                       np.arange(n_users),
                                                       default_value=-1,
                                                       dtype=tf.int64)
    items = fc.categorical_column_with_vocabulary_list("item",
                                                       np.arange(n_items),
                                                       default_value=-1,
                                                       dtype=tf.int64)
    gender = fc.categorical_column_with_vocabulary_list("gender", ["M", "F"])
    age = fc.categorical_column_with_vocabulary_list(
        "age", [1, 18, 25, 35, 45, 50, 56], dtype=tf.int64)
    occupation = fc.categorical_column_with_vocabulary_list("occupation",
                                                            np.arange(21),
                                                            dtype=tf.int64)
    genre1 = fc.categorical_column_with_vocabulary_list("genre1", genre_list)
    genre2 = fc.categorical_column_with_vocabulary_list("genre2", genre_list)
    genre3 = fc.categorical_column_with_vocabulary_list("genre3", genre_list)

    wide_cols = [
        users, items, gender, age, occupation, genre1, genre2, genre3,
        fc.crossed_column([gender, age, occupation],
                          hash_bucket_size=hash_size),
        fc.crossed_column([age, genre1], hash_bucket_size=hash_size)
    ]

    embed_cols = [users, items, age, occupation]
    deep_cols = list()
    for col in embed_cols:
        deep_cols.append(fc.embedding_column(col, embed_size))

    shared_embed_cols = [genre1, genre2, genre3]
    deep_cols.extend(fc.shared_embedding_columns(shared_embed_cols,
                                                 embed_size))
    deep_cols.append(fc.indicator_column(gender))

    label = fc.numeric_column("label", default_value=0.0, dtype=tf.float32)
    feat_columns = [label]
    feat_columns += wide_cols
    feat_columns += deep_cols
    feat_spec = fc.make_parse_example_spec(feat_columns)
    return wide_cols, deep_cols, feat_spec
コード例 #6
0
ファイル: estimators_test.py プロジェクト: w-hat/lattice
    def testCalibratedLatticeEnsembleModelInfo(self, num_lattices,
                                               lattice_rank,
                                               separate_calibrators,
                                               output_calibration):
        self._ResetAllBackends()
        model_config = configs.CalibratedLatticeEnsembleConfig(
            feature_configs=self.heart_feature_configs,
            num_lattices=num_lattices,
            lattice_rank=lattice_rank,
            separate_calibrators=separate_calibrators,
            output_calibration=output_calibration,
        )
        estimator = estimators.CannedClassifier(
            feature_columns=self.heart_feature_columns,
            model_config=model_config,
            feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
            prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5),
            optimizer=tf.keras.optimizers.Adam(0.01),
            prefitting_optimizer=tf.keras.optimizers.Adam(0.01))
        estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20))

        # Serving input fn is used to create saved models.
        serving_input_fn = (
            tf.estimator.export.build_parsing_serving_input_receiver_fn(
                feature_spec=fc.make_parse_example_spec(
                    self.heart_feature_columns)))
        saved_model_path = estimator.export_saved_model(
            estimator.model_dir, serving_input_fn)
        logging.info('Model exported to %s', saved_model_path)
        model = estimators.get_model_graph(saved_model_path)

        expected_num_nodes = (
            len(self.heart_feature_columns) +  # Input features
            num_lattices +  # One lattice per submodel
            1 +  # Averaging submodels
            int(output_calibration))  # Output calibration
        if separate_calibrators:
            expected_num_nodes += num_lattices * lattice_rank
        else:
            expected_num_nodes += len(self.heart_feature_columns)

        self.assertLen(model.nodes, expected_num_nodes)
コード例 #7
0
ファイル: estimators_test.py プロジェクト: w-hat/lattice
    def testCalibratedModelInfo(self, model_type, output_calibration):
        self._ResetAllBackends()
        if model_type == 'linear':
            model_config = configs.CalibratedLinearConfig(
                feature_configs=self.heart_feature_configs,
                output_calibration=output_calibration,
            )
        else:
            model_config = configs.CalibratedLatticeConfig(
                feature_configs=self.heart_feature_configs,
                output_calibration=output_calibration,
            )
        estimator = estimators.CannedClassifier(
            feature_columns=self.heart_feature_columns,
            model_config=model_config,
            feature_analysis_input_fn=self._GetHeartTrainInputFn(num_epochs=1),
            prefitting_input_fn=self._GetHeartTrainInputFn(num_epochs=5),
            optimizer=tf.keras.optimizers.Adam(0.01),
            prefitting_optimizer=tf.keras.optimizers.Adam(0.01))
        estimator.train(input_fn=self._GetHeartTrainInputFn(num_epochs=20))

        # Serving input fn is used to create saved models.
        serving_input_fn = (
            tf.estimator.export.build_parsing_serving_input_receiver_fn(
                feature_spec=fc.make_parse_example_spec(
                    self.heart_feature_columns)))
        saved_model_path = estimator.export_saved_model(
            estimator.model_dir, serving_input_fn)
        logging.info('Model exported to %s', saved_model_path)
        model = estimators.get_model_graph(saved_model_path)

        expected_num_nodes = (
            2 * len(self.heart_feature_columns)
            +  # Input features and calibration
            1 +  # Linear or lattice layer
            int(output_calibration))  # Output calibration

        self.assertLen(model.nodes, expected_num_nodes)
コード例 #8
0
#feature_label = fc.numeric_column('label', shape=(1,), dtype=tf.float32)
#env_columns = tf.FixedLenFeature([1, 4], tf.int64)
#exec_time = tf.FixedLenFeature([], tf.float32)
cpu_column = fc.numeric_column('cpu', (1, 1))
env_columns = fc.numeric_column('env', (1, 3))
total_ops = fc.numeric_column('total_ops')
#exec_time = fc.numeric_column('label')
cat_table_size = fc.categorical_column_with_hash_bucket(key='table_size',
                                                        hash_bucket_size=20)
weighted_column_table = fc.weighted_categorical_column(
    categorical_column=cat_table_size, weight_feature_key='table_size_weight')
feature_columns = [
    cpu_column, env_columns, weighted_column, total_ops, weighted_column_table
]

fmap = fc.make_parse_example_spec(feature_columns)

#fmap['env'] = env_columns


#fmap['label'] = exec_time
#print(fmap)
#https://jhui.github.io/2017/11/21/TensorFlow-Importing-data/
def parser(serialized_example):
    """Parses a single tf.Example into image and label tensors."""

    features = tf.parse_single_example(
        serialized_example,
        # features={
        #     'env': tf.FixedLenFeature([1, 4], tf.int64),
        #     # 'env_segment_number': tf.FixedLenFeature([], tf.int64),
コード例 #9
0
def main(unused_argv):
    set_tfconfig_environ()
    dataset = pd.read_csv(FLAGS.dataset, header=None, usecols=[0, 1, 3, 4, 5],
                          names=["user", "item", "gender", "age", "occupation"])
    item_unique = np.unique(dataset.item.values)
    print("num items: ", len(item_unique))
    item_id_map = dict(zip(item_unique, np.arange(len(item_unique))))
    dataset["item"] = dataset["item"].map(item_id_map)

    train_data, test_data = train_test_split(dataset)
    feature_columns = create_feature_columns(train_data)

    strategy = tf.distribute.experimental.ParameterServerStrategy()
    classifier = tf.estimator.Estimator(
        model_fn=model_fn,
        params={"feature_columns": feature_columns,
                "hidden_units": map(int, FLAGS.hidden_units.split(",")),
                "last_hidden_units": FLAGS.last_hidden_units,
                "lr": FLAGS.learning_rate,
                "use_bn": FLAGS.use_bn,
                "n_classes": FLAGS.n_classes,
                "num_sampled": FLAGS.num_sampled,
                "top_k": FLAGS.top_k,
                "eval_top_n": map(int, FLAGS.eval_top_n.split(","))},
        config=tf.estimator.RunConfig(model_dir="youtube_dir",
                                      save_checkpoints_steps=100000,
                                      train_distribute=strategy))

    print("train steps: ", FLAGS.train_steps, "batch size: ", FLAGS.batch_size)
    train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(train_data, FLAGS.batch_size, mode="train"),
                                        max_steps=FLAGS.train_steps)
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(test_data, FLAGS.batch_size, mode="eval"), steps=None)
    print("before train and evaluate")
    t0 = time.time()
    tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
    print("after train and evaluate, training time: %.4f" % (time.time() - t0))

    t1 = time.time()
    results = classifier.evaluate(input_fn=lambda: input_fn(test_data, FLAGS.batch_size, mode="eval"))
    for key in sorted(results):
        print("%s: %s" % (key, results[key]))
    print("after evaluate, evaluate time: %.4f" % (time.time() - t1))
    print("predict boolean: ", FLAGS.predict)

    if FLAGS.predict:
        pred = list(classifier.predict(input_fn=lambda: input_fn(test_data, FLAGS.batch_size, mode="eval")))
        import random
        random.shuffle(pred)
        print("pred result example: ")
        for i in range(2):
            print(pred[i])

    elif FLAGS.job_name == "worker" and FLAGS.task_index == 0:
        print("exporting model...")
        feature_spec = fc.make_parse_example_spec(feature_columns)
        print(feature_spec)
        serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
        classifier.export_saved_model(FLAGS.saved_model_dir, serving_input_receiver_fn)

        print("save item vector...")
        nce_weights = classifier.get_variable_value("nce_weights")
        nce_biases = classifier.get_variable_value("nce_biases")
        [rows, cols] = nce_weights.shape
        with tf.gfile.FastGFile(FLAGS.output_item_vector, "w") as f:
            for i in range(rows):
                f.write(str(i) + "\t")
                for j in range(cols):
                    f.write(str(nce_weights[i, j]))
                    f.write(u",")
                f.write(str(nce_biases[i]))
                f.write(u"\n")
        print("quit main")
コード例 #10
0
def main(_):
  # Parse configs updates from command line flags.
  config_updates = []
  for update in FLAGS.config_updates:
    config_updates.extend(re.findall(r'(\S*)\s*=\s*(\S*)', update))

  # UCI Statlog (Heart) dataset.
  csv_file = tf.keras.utils.get_file(
      'heart.csv', 'http://storage.googleapis.com/applied-dl/heart.csv')
  df = pd.read_csv(csv_file)
  target = df.pop('target')
  train_size = int(len(df) * 0.8)
  train_x = df[:train_size]
  train_y = target[:train_size]
  test_x = df[train_size:]
  test_y = target[train_size:]

  # feature_analysis_input_fn is used to collect statistics about the input
  # features, thus requiring only one loop of the dataset.
  #
  # feature_analysis_input_fn is required if you have at least one FeatureConfig
  # with "pwl_calibration_input_keypoints='quantiles'". Note that 'quantiles' is
  # default keypoints configuration so most likely you'll need it.
  feature_analysis_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
      x=train_x,
      y=train_y,
      shuffle=False,
      batch_size=FLAGS.batch_size,
      num_epochs=1,
      num_threads=1)

  # prefitting_input_fn is used to prefit an initial ensemble that is used to
  # estimate feature interactions. This prefitting step does not need to fully
  # converge and thus requiring fewer epochs than the main training.
  #
  # prefitting_input_fn is only required if your model_config is
  # CalibratedLatticeEnsembleConfig with "lattices='crystals'"
  prefitting_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
      x=train_x,
      y=train_y,
      shuffle=True,
      batch_size=FLAGS.batch_size,
      num_epochs=FLAGS.prefitting_num_epochs,
      num_threads=1)

  train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
      x=train_x,
      y=train_y,
      shuffle=True,
      batch_size=FLAGS.batch_size,
      num_epochs=FLAGS.num_epochs,
      num_threads=1)

  test_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(
      x=test_x,
      y=test_y,
      shuffle=False,
      batch_size=FLAGS.batch_size,
      num_epochs=FLAGS.num_epochs,
      num_threads=1)

  # Feature columns.
  # - age
  # - sex
  # - cp        chest pain type (4 values)
  # - trestbps  resting blood pressure
  # - chol      serum cholestoral in mg/dl
  # - fbs       fasting blood sugar > 120 mg/dl
  # - restecg   resting electrocardiographic results (values 0,1,2)
  # - thalach   maximum heart rate achieved
  # - exang     exercise induced angina
  # - oldpeak   ST depression induced by exercise relative to rest
  # - slope     the slope of the peak exercise ST segment
  # - ca        number of major vessels (0-3) colored by flourosopy
  # - thal      3 = normal; 6 = fixed defect; 7 = reversable defect
  feature_columns = [
      fc.numeric_column('age', default_value=-1),
      fc.categorical_column_with_vocabulary_list('sex', [0, 1]),
      fc.numeric_column('cp'),
      fc.numeric_column('trestbps', default_value=-1),
      fc.numeric_column('chol'),
      fc.categorical_column_with_vocabulary_list('fbs', [0, 1]),
      fc.categorical_column_with_vocabulary_list('restecg', [0, 1, 2]),
      fc.numeric_column('thalach'),
      fc.categorical_column_with_vocabulary_list('exang', [0, 1]),
      fc.numeric_column('oldpeak'),
      fc.categorical_column_with_vocabulary_list('slope', [0, 1, 2]),
      fc.numeric_column('ca'),
      fc.categorical_column_with_vocabulary_list(
          'thal', ['normal', 'fixed', 'reversible']),
  ]

  # Feature configs are used to specify how each feature is calibrated and used.
  feature_configs = [
      configs.FeatureConfig(
          name='age',
          lattice_size=3,
          # By default, input keypoints of pwl are quantiles of the feature.
          pwl_calibration_num_keypoints=5,
          monotonicity='increasing',
          pwl_calibration_clip_max=100,
      ),
      configs.FeatureConfig(
          name='cp',
          pwl_calibration_num_keypoints=4,
          # Keypoints can be uniformly spaced.
          pwl_calibration_input_keypoints='uniform',
          monotonicity='increasing',
      ),
      configs.FeatureConfig(
          name='chol',
          # Explicit input keypoint initialization.
          pwl_calibration_input_keypoints=[126.0, 210.0, 247.0, 286.0, 564.0],
          monotonicity='increasing',
          pwl_calibration_clip_min=130,
          # Calibration can be forced to span the full output range by clamping.
          pwl_calibration_clamp_min=True,
          pwl_calibration_clamp_max=True,
          # Per feature regularization.
          regularizer_configs=[
              configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
          ],
      ),
      configs.FeatureConfig(
          name='fbs',
          # Monotonicity: output for 1 should be larger than output for 0.
          monotonicity=[(0, 1)],
      ),
      configs.FeatureConfig(
          name='trestbps',
          pwl_calibration_num_keypoints=5,
          monotonicity='decreasing',
      ),
      configs.FeatureConfig(
          name='thalach',
          pwl_calibration_num_keypoints=5,
          monotonicity='decreasing',
      ),
      configs.FeatureConfig(
          name='restecg',
          # Categorical monotonicity can be partial order.
          monotonicity=[(0, 1), (0, 2)],
      ),
      configs.FeatureConfig(
          name='exang',
          monotonicity=[(0, 1)],
      ),
      configs.FeatureConfig(
          name='oldpeak',
          pwl_calibration_num_keypoints=5,
          monotonicity='increasing',
      ),
      configs.FeatureConfig(
          name='slope',
          monotonicity=[(0, 1), (1, 2)],
      ),
      configs.FeatureConfig(
          name='ca',
          pwl_calibration_num_keypoints=4,
          monotonicity='increasing',
      ),
      configs.FeatureConfig(
          name='thal',
          monotonicity=[('normal', 'fixed'), ('normal', 'reversible')],
      ),
  ]

  # Serving input fn is used to create saved models.
  serving_input_fn = (
      tf.estimator.export.build_parsing_serving_input_receiver_fn(
          feature_spec=fc.make_parse_example_spec(feature_columns)))

  # Model config defines the model strcutre for the estimator.
  # This is calibrated linear model with outputput calibration: Inputs are
  # calibrated, linearly combined and the output of the linear layer is
  # calibrated again using a PWL function.
  model_config = configs.CalibratedLinearConfig(
      feature_configs=feature_configs,
      use_bias=True,
      output_calibration=True,
      regularizer_configs=[
          # Regularizer for the output calibrator.
          configs.RegularizerConfig(name='output_calib_hessian', l2=1e-4),
      ])
  # Update model configuration.
  # See tfl.configs.apply_updates for details.
  configs.apply_updates(model_config, config_updates)
  estimator = estimators.CannedClassifier(
      feature_columns=feature_columns,
      model_config=model_config,
      feature_analysis_input_fn=feature_analysis_input_fn,
      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
  estimator.train(input_fn=train_input_fn)
  results = estimator.evaluate(input_fn=test_input_fn)
  print('Calibrated linear results: {}'.format(results))
  print('Calibrated linear model exported to {}'.format(
      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))

  # This is calibrated lattice model: Inputs are calibrated, then combined
  # non-linearly using a lattice layer.
  model_config = configs.CalibratedLatticeConfig(
      feature_configs=feature_configs,
      regularizer_configs=[
          # Torsion regularizer applied to the lattice to make it more linear.
          configs.RegularizerConfig(name='torsion', l2=1e-4),
          # Globally defined calibration regularizer is applied to all features.
          configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
      ])
  estimator = estimators.CannedClassifier(
      feature_columns=feature_columns,
      model_config=model_config,
      feature_analysis_input_fn=feature_analysis_input_fn,
      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
  estimator.train(input_fn=train_input_fn)
  results = estimator.evaluate(input_fn=test_input_fn)
  print('Calibrated lattice results: {}'.format(results))
  print('Calibrated lattice model exported to {}'.format(
      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))

  # This is random lattice ensemble model with separate calibration:
  # model output is the average output of separately calibrated lattices.
  model_config = configs.CalibratedLatticeEnsembleConfig(
      feature_configs=feature_configs,
      num_lattices=6,
      lattice_rank=5,
      separate_calibrators=True,
      regularizer_configs=[
          # Torsion regularizer applied to the lattice to make it more linear.
          configs.RegularizerConfig(name='torsion', l2=1e-4),
          # Globally defined calibration regularizer is applied to all features.
          configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
      ])
  configs.apply_updates(model_config, config_updates)
  estimator = estimators.CannedClassifier(
      feature_columns=feature_columns,
      model_config=model_config,
      feature_analysis_input_fn=feature_analysis_input_fn,
      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
  estimator.train(input_fn=train_input_fn)
  results = estimator.evaluate(input_fn=test_input_fn)
  print('Random ensemble results: {}'.format(results))
  print('Random ensemble model exported to {}'.format(
      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))

  # This is Crystals ensemble model with separate calibration: model output is
  # the average output of separately calibrated lattices.
  # Crystals algorithm first trains a prefitting model and uses the interactions
  # between features to form the final lattice ensemble.
  model_config = configs.CalibratedLatticeEnsembleConfig(
      feature_configs=feature_configs,
      # Using Crystals algorithm.
      lattices='crystals',
      num_lattices=6,
      lattice_rank=5,
      separate_calibrators=True,
      regularizer_configs=[
          # Torsion regularizer applied to the lattice to make it more linear.
          configs.RegularizerConfig(name='torsion', l2=1e-4),
          # Globally defined calibration regularizer is applied to all features.
          configs.RegularizerConfig(name='calib_hessian', l2=1e-4),
      ])
  configs.apply_updates(model_config, config_updates)
  estimator = estimators.CannedClassifier(
      feature_columns=feature_columns,
      model_config=model_config,
      feature_analysis_input_fn=feature_analysis_input_fn,
      # prefitting_input_fn is required to train the prefitting model.
      prefitting_input_fn=prefitting_input_fn,
      optimizer=tf.keras.optimizers.Adam(FLAGS.learning_rate))
  estimator.train(input_fn=train_input_fn)
  results = estimator.evaluate(input_fn=test_input_fn)
  print('Crystals ensemble results: {}'.format(results))
  print('Crystals ensemble model exported to {}'.format(
      estimator.export_saved_model(estimator.model_dir, serving_input_fn)))
コード例 #11
0
 def get_feature_spec(self):
     return fc.make_parse_example_spec([self.get_feature_column()])
コード例 #12
0
from .eval_metrics import AverageNClass, HitAtOne

N_CLASS = 3862
BATCH_SIZE = 1024
VOCAB_FILE = "data/vocabulary.csv"
# Exclude audio feature since we didn't implement audio feature extraction.
# Even if the model can be trained on audio feature,
# they won't be available for inference on new video.
FEAT_COL_VIDEO = [
    fc.numeric_column(key="mean_rgb", shape=(1024, ), dtype=tf.float32),
    #fc.numeric_column(key="mean_audio", shape=(128,), dtype=tf.float32),
    fc.indicator_column(
        fc.categorical_column_with_identity(key="labels", num_buckets=N_CLASS))
]
FEAT_X = ["mean_rgb"]
FEAT_SPEC_VIDEO = fc.make_parse_example_spec(FEAT_COL_VIDEO)
MULTI_HOT_ENCODER = tf.keras.layers.DenseFeatures(FEAT_COL_VIDEO[-1])
# If we'd like to use a custom serving input function, we need to use the estimator API.
# There is no document on how a keras model can use a custom serving input function.
KERAS_TO_ESTIMATOR = True


def calc_class_weight(infile, scale=1):
    """Calculate class weight to re-balance label distribution.
    The class weight for class i (w_i) is determined by:
    w_i = total no. samples / (n_class * count(class i))
    """
    if infile.startswith("gs://"):
        with file_io.FileIO(infile, "r") as f:
            vocab = pd.read_csv(f)
    else: