Example #1
0
def get_wide_deep():
  # define column types
  races = ['White', 'Black', 'American Indian', 'Chinese', 
           'Japanese', 'Hawaiian', 'Filipino', 'Unknown',
           'Asian Indian', 'Korean', 'Samaon', 'Vietnamese']
  is_male,mother_age,mother_race,plurality,gestation_weeks,mother_married,cigarette_use,alcohol_use = \
   [ \
    tflayers.sparse_column_with_keys('is_male', keys=['True', 'False']),
    tflayers.real_valued_column('mother_age'),
    tflayers.sparse_column_with_keys('mother_race', keys=races),
    tflayers.real_valued_column('plurality'),
    tflayers.real_valued_column('gestation_weeks'),
    tflayers.sparse_column_with_keys('mother_married', keys=['True', 'False']),
    tflayers.sparse_column_with_keys('cigarette_use', keys=['True', 'False', 'None']),
    tflayers.sparse_column_with_keys('alcohol_use', keys=['True', 'False', 'None'])
    ]

  # which columns are wide (sparse, linear relationship to output) and which are deep (complex relationship to output?)  
  wide = [is_male, mother_race, plurality, mother_married, cigarette_use, alcohol_use]
  deep = [\
                mother_age,
                gestation_weeks,
                tflayers.embedding_column(mother_race, 3)
               ]
  return wide, deep
Example #2
0
def get_wide_deep():
  # define column types
  races = ['White', 'Black', 'American Indian', 'Chinese', 
           'Japanese', 'Hawaiian', 'Filipino', 'Unknown',
           'Asian Indian', 'Korean', 'Samaon', 'Vietnamese']
  is_male,mother_age,mother_race,plurality,gestation_weeks,mother_married,cigarette_use,alcohol_use = \
   [ \
    tflayers.sparse_column_with_keys('is_male', keys=['True', 'False']),
    tflayers.real_valued_column('mother_age'),
    tflayers.sparse_column_with_keys('mother_race', keys=races),
    tflayers.real_valued_column('plurality'),
    tflayers.real_valued_column('gestation_weeks'),
    tflayers.sparse_column_with_keys('mother_married', keys=['True', 'False']),
    tflayers.sparse_column_with_keys('cigarette_use', keys=['True', 'False', 'None']),
    tflayers.sparse_column_with_keys('alcohol_use', keys=['True', 'False', 'None'])
    ]

  # which columns are wide (sparse, linear relationship to output) and which are deep (complex relationship to output?)  
  wide = [is_male, mother_race, plurality, mother_married, cigarette_use, alcohol_use]
  deep = [\
                mother_age,
                gestation_weeks,
                tflayers.embedding_column(mother_race, 3)
               ]
  return wide, deep
Example #3
0
def train_and_eval(train_steps, log_dir, training_set, validation_set, testing_set, ):
    sparse_columns = [
        layers.sparse_column_with_keys(attribute, training_set[attribute].unique()) for attribute in FEATURE_ATTRIBUTES
    ]
    embedding_columns = [
        layers.embedding_column(column, dimension=8) for column in sparse_columns
    ]
    m = learn.DNNClassifier(
        hidden_units=[10, 50, ],
        feature_columns=embedding_columns,
        model_dir=log_dir,
        config=learn.RunConfig(save_checkpoints_secs=1, ),
    )
    validation_metrics = {
        "accuracy": learn.MetricSpec(metric_fn=metrics.streaming_accuracy, prediction_key="classes"),
        "precision": learn.MetricSpec(metric_fn=metrics.streaming_precision, prediction_key="classes"),
        "recall": learn.MetricSpec(metric_fn=metrics.streaming_recall, prediction_key="classes"),
    }
    monitors = [
        learn.monitors.ValidationMonitor(
            input_fn=lambda: input_fn(validation_set),
            every_n_steps=1000,
            metrics=validation_metrics,
            early_stopping_rounds=1,
        ),
    ]
    m.fit(
        input_fn=lambda: input_fn(training_set),
        steps=train_steps,
        monitors=monitors,
    )
    results = m.evaluate(input_fn=lambda: input_fn(testing_set), steps=1)
    for key in sorted(results):
        print("%s: %s" % (key, results[key]))
Example #4
0
def get_features_raw():
    real = {
      colname : tflayers.real_valued_column(colname) \
          for colname in \
            ('dep_delay,taxiout,distance,avg_dep_delay,avg_arr_delay' + 
             ',dep_lat,dep_lon,arr_lat,arr_lon').split(',')
    }
    sparse = {
      'carrier': tflayers.sparse_column_with_keys('carrier',
                  keys='AS,VX,F9,UA,US,WN,HA,EV,MQ,DL,OO,B6,NK,AA'.split(',')),
      'origin' : tflayers.sparse_column_with_hash_bucket('origin', hash_bucket_size=1000), # FIXME
      'dest'   : tflayers.sparse_column_with_hash_bucket('dest', hash_bucket_size=1000) #FIXME
    }
    return real, sparse
def build_estimator(model_dir, model_type):
    """build an estimator"""

    # base sparse feature process
    gender = layers.sparse_column_with_keys(column_name='gender', keys=['female', 'male'])
    education = layers.sparse_column_with_hash_bucket(column_name='education', hash_bucket_size=1000)
    relationship = layers.sparse_column_with_hash_bucket(column_name='relationship', hash_bucket_size=100)
    workclass = layers.sparse_column_with_hash_bucket(column_name='workclass', hash_bucket_size=100)
    occupation = layers.sparse_column_with_hash_bucket(column_name='occupation', hash_bucket_size=1000)
    native_country = layers.sparse_column_with_hash_bucket(column_name='native_country', hash_bucket_size=1000)

    # base continuous feature
    age = layers.real_valued_column(column_name='age')
    education_num = layers.real_valued_column(column_name='education_num')
    capital_gain = layers.real_valued_column(column_name='capital_gain')
    capital_loss = layers.real_valued_column(column_name='capital_loss')
    hours_per_week = layers.real_valued_column(column_name='hours_per_week')

    # transformation.bucketization 将连续变量转化为类别标签。从而提高我们的准确性
    age_bucket = layers.bucketized_column(source_column=age,
                                          boundaries=[18, 25, 30, 35, 40, 45,50, 55, 60, 65])

    # wide columns and deep columns
    # 深度模型使用到的特征和广度模型使用到的特征
    # 广度模型特征只只用到了分类标签
    wide_columns = [gender, native_country, education, relationship, workclass, occupation, age_bucket,
                    layers.crossed_column(columns=[education, occupation], hash_bucket_size=int(1e4)),
                    layers.crossed_column(columns=[age_bucket, education, occupation], hash_bucket_size=int(1e6)),
                    layers.crossed_column(columns=[native_country, occupation], hash_bucket_size=int(1e4))]

    deep_columns = [layers.embedding_column(workclass, dimension=8),
                    layers.embedding_column(education, dimension=8),
                    layers.embedding_column(gender, dimension=8),
                    layers.embedding_column(relationship, dimension=8),
                    layers.embedding_column(native_country, dimension=8),
                    layers.embedding_column(occupation, dimension=8),
                    age, education_num, capital_gain, capital_loss, hours_per_week]

    if model_type == "wide":
        m=learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
    elif model_type == "deep":
        m=learn.DNNClassifier(feature_columns=deep_columns, model_dir=model_dir, hidden_units=[100, 50])
    else:
        m=learn.DNNLinearCombinedClassifier(model_dir=model_dir,
                                            linear_feature_columns=wide_columns,
                                            dnn_feature_columns=deep_columns,
                                            dnn_hidden_units=[256, 128, 64],
                                            dnn_activation_fn=tf.nn.relu)
    return m
Example #6
0
def part4():
    global boston, x_data, y_data
    import pandas as pd
    import numpy as np
    N = 10000

    weight = np.random.randn(N) * 5 + 70
    spec_id = np.random.randint(0, 3, N)
    bias = [0.9, 1, 1.1]
    height = np.array(
        [weight[i] / 100 + bias[b] for i, b in enumerate(spec_id)])
    spec_name = ['Goblin', 'Human', 'ManBear']
    spec = [spec_name[s] for s in spec_id]

    df = pd.DataFrame({'Species': spec, 'Weight': weight, 'Height': height})

    from tensorflow.contrib import layers
    Weight = layers.real_valued_column("Weight")
    Species = layers.sparse_column_with_keys(column_name="Species",
                                             keys=spec_name)
    reg = learn.LinearRegressor(feature_columns=[Weight, Species])

    def input_fn(df):
        feature_cols = {}
        feature_cols['Weight'] = tf.constant(df['Weight'].values)

        feature_cols['Species'] = tf.SparseTensor(
            indices=[[i, 0] for i in range(df['Species'].size)],
            values=df['Species'].values,
            dense_shape=[df['Species'].size, 1])

        labels = tf.constant(df['Height'].values)

        return feature_cols, labels

    reg.fit(input_fn=lambda: input_fn(df), steps=50000)

    w_w = reg.get_variable_value('linear/Weight/weight')
    print(f"Estimation for Weight: {w_w}")

    v = reg.get_variable_names()
    print(f"Classes: {v}")

    s_w = reg.get_variable_value('linear/Species/weights')
    b = reg.get_variable_value('linear/bias_weight')
    print(f"Estimation for Species: {s_w + b}")
Example #7
0
def train_and_eval(model_dir, training_set, testing_set, ):
    sparse_columns = [
        layers.sparse_column_with_keys(
            attribute['name'], pandas.read_csv(attribute['path'], sep='\t')['id'].apply(str),
        ) for attribute in FEATURE_ATTRIBUTES
    ]
    embedding_columns = [layers.embedding_column(column, dimension=3) for column in sparse_columns]
    model = learn.DNNRegressor(
        hidden_units=[3, ],
        feature_columns=embedding_columns,
        model_dir=model_dir,
        config=learn.RunConfig(save_checkpoints_secs=100, ),
    )
    model.fit(input_fn=lambda: input_fn(training_set), steps=20000, )
    results = model.evaluate(input_fn=lambda: input_fn(testing_set), steps=1)
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))
Example #8
0
def build_feature_cols():
    # Sparse base columns.
    gender = layers.sparse_column_with_keys(column_name="gender",
                                            keys=["female", "male"])
    race = layers.sparse_column_with_keys(column_name="race",
                                          keys=[
                                              "Amer-Indian-Eskimo",
                                              "Asian-Pac-Islander", "Black",
                                              "Other", "White"
                                          ])

    education = layers.sparse_column_with_hash_bucket("education",
                                                      hash_bucket_size=1000)
    marital_status = layers.sparse_column_with_hash_bucket(
        "marital_status", hash_bucket_size=100)
    relationship = layers.sparse_column_with_hash_bucket("relationship",
                                                         hash_bucket_size=100)
    workclass = layers.sparse_column_with_hash_bucket("workclass",
                                                      hash_bucket_size=100)
    occupation = layers.sparse_column_with_hash_bucket("occupation",
                                                       hash_bucket_size=1000)
    native_country = layers.sparse_column_with_hash_bucket(
        "native_country", hash_bucket_size=1000)

    # Continuous base columns.
    age = layers.real_valued_column("age")
    education_num = layers.real_valued_column("education_num")
    capital_gain = layers.real_valued_column("capital_gain")
    capital_loss = layers.real_valued_column("capital_loss")
    hours_per_week = layers.real_valued_column("hours_per_week")

    # Transformations.
    age_buckets = layers.bucketized_column(
        age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
    education_occupation = layers.crossed_column([education, occupation],
                                                 hash_bucket_size=int(1e4))
    age_race_occupation = layers.crossed_column(
        [age_buckets, race, occupation], hash_bucket_size=int(1e6))
    country_occupation = layers.crossed_column([native_country, occupation],
                                               hash_bucket_size=int(1e4))

    # Wide columns and deep columns.
    wide_columns = [
        gender, native_country, education, occupation, workclass, race,
        marital_status, relationship, age_buckets, education_occupation,
        age_race_occupation, country_occupation
    ]

    deep_columns = [
        layers.embedding_column(gender, dimension=8),
        layers.embedding_column(native_country, dimension=8),
        layers.embedding_column(education, dimension=8),
        layers.embedding_column(occupation, dimension=8),
        layers.embedding_column(workclass, dimension=8),
        layers.embedding_column(race, dimension=8),
        layers.embedding_column(marital_status, dimension=8),
        layers.embedding_column(relationship, dimension=8),
        # layers.embedding_column(age_buckets, dimension=8),
        layers.embedding_column(education_occupation, dimension=8),
        layers.embedding_column(age_race_occupation, dimension=8),
        layers.embedding_column(country_occupation, dimension=8),
        age,
        education_num,
        capital_gain,
        capital_loss,
        hours_per_week,
    ]

    return wide_columns, deep_columns
    values=table_species_weight_height['Species'].values
    dense_shape=[table_species_weight_height['Species'].size, 1]
            
    feature_cols['Species'] =  tf.SparseTensor(indices, values, dense_shape)
                    
    measured_heights = tf.constant(table_species_weight_height['Height'].values)

    return feature_cols, measured_heights


# In[ ]:


Weight = layers.real_valued_column("Weight")

Species = layers.sparse_column_with_keys(
    column_name="Species", keys=['Goblin','Human','ManBears'])


# In[ ]:


reg = learn.LinearRegressor(feature_columns=[Weight,Species])


# In[ ]:


reg.fit(input_fn=lambda:input_fn(table_species_weight_height), steps=25000)#steps=50000)


# In[ ]:
Example #10
0
def input_fn(df):
    feature_cols = {}
    feature_cols['Weight'] = tf.constant(df['Weight'].values)

    feature_cols['Species'] = tf.SparseTensor(
        indices=[[i, 0] for i in range(df['Species'].size)],
        values=df['Species'].values,
        dense_shape=[df['Species'].size, 1]
    )
    labels = tf.constant(df['Height'].values)
    return feature_cols, labels


from tensorflow.contrib import layers
from tensorflow.contrib import learn

Weight = layers.real_valued_column('Weight')
Species = layers.sparse_column_with_keys(column_name='Species', keys=['Goblin', 'Human', 'MinBears'])
reg = learn.LinearRegressor(feature_columns=[Weight, Species])
reg.fit(input_fn=lambda: input_fn(df), steps=50000)

w_w = reg.get_variable_value('linear/Weight/weight')
print('Estimation for Weight: {}'.format(w_w))

s_w = reg.get_variable_value('linear/Species/weights')
b = reg.get_variable_value('linear/bias_weight')
print('Estimation for Species: {}'.format(s_w + b))

## 마지막 라인..
Example #11
0
from tensorflow.contrib.learn.python.learn import DNNLinearCombinedClassifier, LinearClassifier
from tensorflow.contrib.layers import bucketized_column, crossed_column, embedding_column, sparse_column_with_keys, sparse_column_with_hash_bucket, real_valued_column
from tempfile import mkdtemp

PATH_TO_DIRECTORY_OF_THIS_FILE = dirname(realpath(__file__))
PATH_TO_DIRECTORY_OF_INPUT_DATA = PATH_TO_DIRECTORY_OF_THIS_FILE + "/data/input"
MODEL_DIR = PATH_TO_DIRECTORY_OF_THIS_FILE + "/classifier"

CATEGORICAL_COLUMNS = ["admin_level", "country_code", "edit_distance", "has_mpoly", "has_pcode", "is_country", "is_highest_population", "is_lowest_admin_level", "matches_topic"]
CONTINUOUS_COLUMNS = ["cluster_frequency", "country_rank", "median_distance", "population", "popularity"]
LABEL_COLUMN = "correct"
COLUMNS = sorted(CATEGORICAL_COLUMNS + CONTINUOUS_COLUMNS) + [LABEL_COLUMN]
print "COLUMNS:", COLUMNS


admin_level = sparse_column_with_keys(column_name="admin_level", keys=["None","0","1","2","3","4","5","6"]) # I've never seen admin 6, but you never know!
cluster_frequency = real_valued_column("cluster_frequency")
cluster_frequency_buckets = bucketized_column(cluster_frequency, boundaries=[0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1])
country_code = sparse_column_with_hash_bucket("country_code", hash_bucket_size=500)
country_rank = real_valued_column("country_rank")
edit_distance = sparse_column_with_keys(column_name="edit_distance", keys=["0", "1", "2"])
has_pcode = sparse_column_with_keys(column_name="has_pcode", keys=["True", "False"])
has_mpoly = sparse_column_with_keys(column_name="has_mpoly", keys=["True", "False"])
is_country = sparse_column_with_keys(column_name="is_country", keys=["True", "False"])
is_lowest_admin_level = sparse_column_with_keys(column_name="is_lowest_admin_level", keys=["True", "False"])
is_highest_population = sparse_column_with_keys(column_name="is_highest_population", keys=["True", "False"])
matches_topic = sparse_column_with_keys(column_name="matches_topic", keys=["True", "False"])
median_distance = real_valued_column("median_distance")
median_distance_buckets = bucketized_column(median_distance, boundaries=[10,50,100,200,300])
population = real_valued_column("population")
population_buckets = bucketized_column(population, boundaries=[0, 1, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000])
Example #12
0
	feature_placeholders = dict(list(cont_feature_placeholders.items()) + list(cat_feature_placeholders.items()))
	features = {column : tensor for column, tensor in feature_placeholders.items()}
	label = None
	return InputFnOps(features, label, feature_placeholders)

#
# Binary classification
#

audit_df = load_csv("Audit.csv")
audit_df["Adjusted"] = audit_df["Adjusted"].astype(int)

audit_cont_columns = ["Age", "Income", "Deductions", "Hours"]
audit_cat_columns = ["Employment", "Education", "Marital", "Occupation", "Gender"]

audit_feature_columns = [real_valued_column(column, dtype = tf.float64) for column in audit_cont_columns] + [sparse_column_with_keys(column, dtype = tf.string, keys = sorted(audit_df[column].unique())) for column in audit_cat_columns]

def audit_input_fn():
	return _input_fn(audit_df, audit_cont_columns, audit_cat_columns, "Adjusted")

def audit_serving_input_fn():
	return _serving_input_fn(audit_cont_columns, audit_cat_columns)

def build_audit(classifier, max_steps, name, with_proba = True):
	classifier.fit(input_fn = audit_input_fn, max_steps = max_steps)

	adjusted = DataFrame(classifier.predict(input_fn = audit_input_fn, as_iterable = False), columns = ["_target"])
	if(with_proba):
		adjusted_proba = DataFrame(classifier.predict_proba(input_fn = audit_input_fn, as_iterable = False), columns = ["probability(0)", "probability(1)"])
		adjusted = pandas.concat((adjusted, adjusted_proba), axis = 1)
	store_csv(adjusted, name + ".csv")
Example #13
0
UNUSED_COLUMNS = ['datetime']

INPUT_COLUMN_NAMES = {
    'dayofweek': tf.string,
    'hourofday': tf.int32,
    'pickuplon': tf.float32,
    'pickuplat': tf.float32,
    'dropofflon': tf.float32,
    'dropofflat': tf.float32,
    'passengers': tf.int32
}

# These are the raw input columns, and will be provided for prediction also
INPUT_COLUMNS = [
    # define features
    layers.sparse_column_with_keys(
        'dayofweek', keys=['Sun', 'Mon', 'Tues', 'Wed', 'Thu', 'Fri', 'Sat']),
    layers.sparse_column_with_integerized_feature('hourofday', bucket_size=24),

    # engineered features that are created in the input_fn
    layers.real_valued_column('latdiff'),
    layers.real_valued_column('londiff'),
    layers.real_valued_column('euclidean'),

    # real_valued_column
    layers.real_valued_column('pickuplon'),
    layers.real_valued_column('pickuplat'),
    layers.real_valued_column('dropofflat'),
    layers.real_valued_column('dropofflon'),
    layers.real_valued_column('passengers'),
]
Example #14
0
import tensorflow.contrib.learn as tflearn
from tensorflow.contrib import metrics
import numpy as np

tf.logging.set_verbosity(tf.logging.INFO)

CSV_COLUMNS = 'fare_amount,dayofweek,hourofday,pickuplon,pickuplat,dropofflon,dropofflat,passengers,key'.split(',')
SCALE_COLUMNS = ['pickuplon','pickuplat','dropofflon','dropofflat','passengers']
LABEL_COLUMN = 'fare_amount'
KEY_FEATURE_COLUMN = 'key'
DEFAULTS = [[0.0], ['Sun'], [0], [-74.0], [40.0], [-74.0], [40.7], [1.0], ['nokey']]

# These are the raw input columns, and will be provided for prediction also
INPUT_COLUMNS = [
    # define features
    layers.sparse_column_with_keys('dayofweek', keys=['Sun', 'Mon', 'Tues', 'Wed', 'Thu', 'Fri', 'Sat']),
    layers.sparse_column_with_integerized_feature('hourofday', bucket_size=24),

    # engineered features that are created in the input_fn
    layers.real_valued_column('latdiff'),
    layers.real_valued_column('londiff'),
    layers.real_valued_column('euclidean'),

    # real_valued_column
    layers.real_valued_column('pickuplon'),
    layers.real_valued_column('pickuplat'),
    layers.real_valued_column('dropofflat'),
    layers.real_valued_column('dropofflon'),
    layers.real_valued_column('passengers'),
]
               'marital_status', 'occupation', 'relationship', 'race', 'gender',
               'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
               'income_bracket']
CSV_COLUMN_DEFAULTS = [[0], [''], [0], [''], [0], [''], [''], [''], [''], [''],
                       [0], [0], [0], [''], ['']]
LABEL_COLUMN = 'income_bracket'
LABELS = [' <=50K', ' >50K']

# Define the initial ingestion of each feature used by your model.
# Additionally, provide metadata about the feature.
INPUT_COLUMNS = [
    # Categorical base columns

    # For categorical columns with known values we can provide lists
    # of values ahead of time.
    layers.sparse_column_with_keys(column_name='gender', keys=['female', 'male']),

    layers.sparse_column_with_keys(
        column_name='race',
        keys=[
            'Amer-Indian-Eskimo',
            'Asian-Pac-Islander',
            'Black',
            'Other',
            'White'
        ]
    ),

    # Otherwise we can use a hashing function to bucket the categories
    layers.sparse_column_with_hash_bucket('education', hash_bucket_size=1000),
    layers.sparse_column_with_hash_bucket('marital_status', hash_bucket_size=100),
Example #16
0
training_size = int(len(data) * 0.8)
verification_size = len(data) - training_size

randomized_data = data.sample(frac=1)
training_examples = randomized_data.head(training_size)[FEATURES]
training_targets = randomized_data.head(training_size)[[TARGET]]
validation_examples = randomized_data.tail(verification_size)[FEATURES]
validation_targets = randomized_data.tail(verification_size)[[TARGET]]

STEPS = 5000
BATCH_SIZE = 5
periods = 1

feature_columns = [
    layers.sparse_column_with_keys(column_name="sex", keys=["M", "F", "I"])
] + ([layers.real_valued_column(name) for name in REAL_VALUED_FEATURES])

linear_regressor = learn.LinearRegressor(
    optimizer=tensorflow.train.GradientDescentOptimizer(0.05),
    feature_columns=feature_columns)


def input_fn(features, target=None):
    """Input builder function."""
    # Creates a dictionary mapping from each continuous feature column name (k) to
    # the values of that column stored in a constant Tensor.
    continuous_cols = {
        k: tensorflow.constant(features[k].values)
        for k in REAL_VALUED_FEATURES
    }
Example #17
0
# Binary classification
#

audit_df = load_csv("Audit.csv")
audit_df["Adjusted"] = audit_df["Adjusted"].astype(int)

audit_cont_columns = ["Age", "Income", "Deductions", "Hours"]
audit_cat_columns = [
    "Employment", "Education", "Marital", "Occupation", "Gender"
]

audit_feature_columns = [
    real_valued_column(column, dtype=tf.float64)
    for column in audit_cont_columns
] + [
    sparse_column_with_keys(
        column, dtype=tf.string, keys=sorted(audit_df[column].unique()))
    for column in audit_cat_columns
]


def audit_input_fn():
    return _input_fn(audit_df, audit_cont_columns, audit_cat_columns,
                     "Adjusted")


def audit_serving_input_fn():
    return _serving_input_fn(audit_cont_columns, audit_cat_columns)


def build_audit(classifier, name, with_proba=True):
    classifier.fit(input_fn=audit_input_fn, steps=2000)
Example #18
0
        indices=[[i,0] for i in range(df[k].size)],
        values=df[k].values,
        shape=[df[k].size,1]) for k in features}
    label=tensorflow.constant(df["tmp"].values)
    return col,label

def train_input_fn():
	return input_fn(data)	
	
def testing_fn():
	return input_fn(data_test)		

data["tmp"]= data["class"].apply(help)
data_test["tmp"]= data_test["class"].apply(help)

buying = layers.sparse_column_with_keys(column_name="buying",keys=["low","med","high","vhigh"])
maint = layers.sparse_column_with_keys(column_name="maint",keys=["low","med","high","vhigh"])
doors = layers.sparse_column_with_keys(column_name="doors",keys=["2","3","4","5more"])
persons = layers.sparse_column_with_keys(column_name="persons",keys=["2","4","more"])
lug_boot = layers.sparse_column_with_keys(column_name="lug_boot",keys=["small","med","big"])
safety = layers.sparse_column_with_keys(column_name="safety",keys=["low","med","high"])

buying_emb = layers.embedding_column(buying,dimension=4)
maint_emb = layers.embedding_column(maint,dimension=4)
doors_emb = layers.embedding_column(doors,dimension=4)
persons_emb = layers.embedding_column(persons,dimension=3)
lug_boot_emb = layers.embedding_column(lug_boot,dimension=3)
safety_emb = layers.embedding_column(safety,dimension=3)

dnn_classifier = learn.DNNClassifier(feature_columns=[buying_emb, maint_emb,doors_emb,persons_emb,lug_boot_emb,safety_emb], hidden_units=[10], n_classes=4, )
#dnn_classifier.fit(X_train, y_train, steps = 1000)