def test_trainer_shouldnt_crash(self): label_key = "label" feature_spec, _ = Datasets.parse_schema(self.schema_path) all_features = { name: tf.feature_column.numeric_column(name, default_value=.0) for name in feature_spec.keys() } feature_columns = all_features.copy() feature_columns.pop(label_key) config = tf.estimator.RunConfig(tempfile.mkdtemp()) estimator = tf.estimator.LinearClassifier( feature_columns=feature_columns.values(), config=config) def split_features_label_fn(parsed_features): label = parsed_features.pop(label_key) return parsed_features, label def get_in_fn(data): raw_feature_spec = tf.feature_column.make_parse_example_spec( all_features.values()) def in_fn(): dataset = Datasets.examples_via_feature_spec( data, raw_feature_spec) return dataset.map(split_features_label_fn) return in_fn estimator.train(get_in_fn(self.train_data)).evaluate( get_in_fn(self.eval_data))
def train(_): import tempfile config = tf.estimator.RunConfig(tempfile.mkdtemp()) train_data_dir = get_data_dir("train") schema_path = os.path.join(train_data_dir, "_inferred_schema.pb") feature_spec, _ = Datasets.parse_schema(schema_path) # we use OrderedDict and sorted keys for features for determinism all_features = OrderedDict([ (name, tf.feature_column.numeric_column(name, default_value=.0)) for name in sorted(feature_spec.keys()) ]) feature_columns = all_features.copy() label_keys = sorted( [l for l in set(feature_columns.keys()) if l.startswith("class_name")]) for l in label_keys: feature_columns.pop(l) def split_features_label_fn(spec): # Canned TF's LinearClassifier requires label to be a single integer, Featran gives us # one hot encoding for class, thus we need to convert one hot encoding to single integer labels = tf.concat([[spec.pop(l)] for l in label_keys], axis=0) label = tf.argmax(labels, axis=0) # Get the rest of the features out of the spec return spec, label def get_in_fn(data): raw_feature_spec = tf.feature_column.make_parse_example_spec( all_features.values()) def in_fn(): dataset = Datasets.examples_via_feature_spec( data, raw_feature_spec) return dataset.map(split_features_label_fn) return in_fn classifier = tf.estimator.LinearClassifier( feature_columns=feature_columns.values(), n_classes=3, config=config) train_data = os.path.join(train_data_dir, "part-*") eval_data = os.path.join(get_data_dir("eval"), "part-*") classifier.train(get_in_fn(train_data)).evaluate(get_in_fn(eval_data))