コード例 #1
0
def make_columns():
  user_id = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'user_id', vocabulary_file='user_id', dtype=tf.string, num_oov_buckets=4), dimension=6)

  partner_ids = fc.categorical_column_with_vocabulary_file(
    'reserve_partner_car_type_id', vocabulary_file='partner_car_type_id', dtype=tf.string, num_oov_buckets=1)
  partner_ids_embedding = fc.embedding_column(partner_ids, dimension=3)
  # partner_ids_embedding = fc.indicator_column(partner_ids)

  dayofweek = fc.embedding_column(fc.categorical_column_with_vocabulary_list(
    'dayofweek', [str(d) for d in range(0, 8)], dtype=tf.string, num_oov_buckets=1), dimension=3)
  timeSlice = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'timeSlice', vocabulary_file='timeSlice', dtype=tf.string, num_oov_buckets=1), dimension=3)

  sHexID = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'sHexID', vocabulary_file='sHexID', num_oov_buckets=1), dimension=6)
  eHexID = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'eHexID', vocabulary_file='eHexID', num_oov_buckets=1), dimension=6)
  order_columns = [
    fc.numeric_column('dist')
    ]

  user_columns = [fc.numeric_column(c) for c in user_null_columns + user_float32_columns]
  spacetime_columns = [fc.numeric_column(c) for c in int64_columns + float32_columns]

  embedding_columns = [user_id, partner_ids_embedding, dayofweek, timeSlice, sHexID]
  # embedding_columns = [dayofweek]
  # return embedding_columns + order_columns, embedding_columns + order_columns + spacetime_columns
  return embedding_columns, order_columns, spacetime_columns, user_columns
コード例 #2
0
  def _base():
    education_num=fc.numeric_column('education_num')
    capital_gain=fc.numeric_column('capital_gain')
    capital_loss=fc.numeric_column('capital_loss')
    hours_per_week=fc.numeric_column('hours_per_week')

    #categorical,embedding_column
    relationship=fc.categorical_column_with_vocabulary_file('relationship',vocabulary_file='data/relationship')
    relationship=fc.indicator_column(relationship)

    education=fc.categorical_column_with_vocabulary_file('education',vocabulary_file='data/education')
    education=fc.indicator_column(education)

    race=fc.categorical_column_with_vocabulary_file('race',vocabulary_file='data/race')
    race=fc.indicator_column(race)

    occupation=fc.indicator_column(fc.categorical_column_with_hash_bucket('occupation',20))
    return [education_num,capital_gain,capital_loss,hours_per_week,relationship,education,race,occupation]
コード例 #3
0
ファイル: feature_column.py プロジェクト: ifkid/xiaohongshu
def categorical_column(key, vocabulary_size=None,
                       vocabulary_list=None,
                       vocabulary_file=None,
                       num_oov_buckets=0):
    if vocabulary_size:
        categorical_col = feature_column.categorical_column_with_identity(key, vocabulary_size)
        return categorical_col
    elif vocabulary_list:
        assert isinstance(vocabulary_list[0], six.string_types), "Vocabulary must be sequence of string"
        categorical_col = feature_column.categorical_column_with_vocabulary_list(key, vocabulary_list, num_oov_buckets)
        return categorical_col
    elif vocabulary_file:
        categorical_col = feature_column.categorical_column_with_vocabulary_file(key, vocabulary_file, num_oov_buckets)
        return categorical_col
コード例 #4
0
def make_columns_with_normalizer_with_file():
  with open('summary.json') as fp:
    import pandas as pd
    summary = pd.DataFrame(json.load(fp)).T

  # categorical_column_with_vocabulary_list
  user_id = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'user_id', vocabulary_file='user_id', dtype=tf.string, num_oov_buckets=4), dimension=6)

  partner_ids = fc.categorical_column_with_vocabulary_file(
    'reserve_partner_car_type_id', vocabulary_file='partner_car_type_id', dtype=tf.string, num_oov_buckets=1)
  partner_ids_embedding = fc.embedding_column(partner_ids, dimension=3)
  # partner_ids_embedding = fc.indicator_column(partner_ids)

  dayofweek = fc.embedding_column(fc.categorical_column_with_vocabulary_list(
    'dayofweek', [str(d) for d in range(0, 8)], dtype=tf.string, num_oov_buckets=1), dimension=3)
  timeSlice = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'timeSlice', vocabulary_file='timeSlice', dtype=tf.string, num_oov_buckets=1), dimension=3)

  sHexID = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'sHexID', vocabulary_file='sHexID', num_oov_buckets=1), dimension=6)
  eHexID = fc.embedding_column(fc.categorical_column_with_vocabulary_file(
    'eHexID', vocabulary_file='eHexID', num_oov_buckets=1), dimension=6)
  order_columns = [
    fc.numeric_column('dist', normalizer_fn=lambda v: normalizer_fn(v, float(summary['dist']['min']), float(summary['dist']['max'])))
    ]

  user_columns = [fc.numeric_column(c) for c in user_null_columns + user_float32_columns]
  # TODO: summary info

  spacetime_columns = [fc.numeric_column(c) for c in int64_columns]
  spacetime_columns += [fc.numeric_column(c, normalizer_fn=lambda v: normalizer_fn(v, float(summary[c]['min']), float(summary[c]['max']))) for c in float32_columns]

  embedding_columns = [user_id, partner_ids_embedding, dayofweek, timeSlice, sHexID]
  # embedding_columns = [dayofweek]
  # return embedding_columns + order_columns, embedding_columns + order_columns + spacetime_columns
  return embedding_columns, order_columns, spacetime_columns, user_columns
コード例 #5
0
def create_tf_categorical_feature_cols(categorical_col_list,
                                       vocab_dir='./diabetes_vocab/'):
    '''
    categorical_col_list: list, categorical field list that will be transformed with TF feature column
    vocab_dir: string, the path where the vocabulary text files are located
    return:
        output_tf_list: list of TF feature columns
    '''
    output_tf_list = []
    for c in categorical_col_list:
        vocab_file_path = os.path.join(vocab_dir, c + "_vocab.txt")
        '''
        Which TF function allows you to read from a text file and create a categorical feature
        You can use a pattern like this below...
        tf_categorical_feature_column = tf.feature_column.......
        '''
        cat = feature_column.categorical_column_with_vocabulary_file(
            c, vocab_file_path)
        col = feature_column.indicator_column(cat)
        output_tf_list.append(col)
    return output_tf_list
コード例 #6
0
ファイル: Main.py プロジェクト: stechr8/HonoursProject
feature_columns.append(resStatus_one_hot)

arrivalMonth = feature_column.categorical_column_with_vocabulary_list(
    'ArrivalDateMonth', [
        'January', 'February', 'March', 'April', 'May', 'June', 'July',
        'August', 'September', 'October', 'November', 'December'
    ])

arrivalMonth_one_hot = feature_column.indicator_column(arrivalMonth)

feature_columns.append(arrivalMonth_one_hot)

countryNames = feature_column.categorical_column_with_vocabulary_file(
    'Country',
    sys.argv[3],
    vocabulary_size=None,
    dtype=tf.dtypes.string,
    default_value=None,
    num_oov_buckets=0)

countryNames_one_hot = feature_column.indicator_column(countryNames)

feature_columns.append(countryNames_one_hot)

for header in [
        'LeadTime', 'ArrivalDateWeekNumber', 'StaysInWeekendNights',
        'StaysInWeekNights', 'Adults', 'Children', 'Babies',
        'PreviousCancellations', 'PreviousBookingsNotCanceled',
        'BookingChanges', 'DaysInWaitingList', 'RequiredCarParkingSpaces',
        'TotalOfSpecialRequests', 'ADR', 'ArrivalDateDayOfMonth',
        'ArrivalDateYear', 'Agent', 'Company'
コード例 #7
0
#-*- coding:utf-8 -*-

#定义feature_columns
import tensorflow as tf
from tensorflow import feature_column as fc

#-----------------------用户特征列-----------------------------
province = fc.indicator_column(
    fc.categorical_column_with_vocabulary_file('province',
                                               'resource/province'))
region = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list(
        'region', ['东北', '华中', '华东', '华北', '西北', '华南', '西南']))
city = fc.indicator_column(
    fc.categorical_column_with_vocabulary_file('city', 'resource/city'))
city_level = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list(
        'city_level', ['一线城市', '新一线城市', '二线城市', '三线城市', '四线城市', '五线城市']))
browser = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list('browser', [0, 1]))
os = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list(
        'os', ['Android', 'android', 'devtools', 'unknown', 'iPhone', 'ios']))
ipv_7d_type = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list('ipv_7d_type', [1, 2, 3, 4]))
ipv_15d_type = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list('ipv_15d_type', [1, 2, 3, 4]))
ipv_30d_type = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list('ipv_30d_type', [1, 2, 3, 4]))
ipv_60d_type = fc.indicator_column(
    fc.categorical_column_with_vocabulary_list('ipv_60d_type', [1, 2, 3, 4]))
コード例 #8
0
ファイル: Main.py プロジェクト: stechr8/HonoursProject
feature_columns.append(resStatus_one_hot)

arrivalMonth = feature_column.categorical_column_with_vocabulary_list(
    'ArrivalDateMonth', [
        'January', 'February', 'March', 'April', 'May', 'June', 'July',
        'August', 'September', 'October', 'November', 'December'
    ])

arrivalMonth_one_hot = feature_column.indicator_column(arrivalMonth)

feature_columns.append(arrivalMonth_one_hot)

countryNames = feature_column.categorical_column_with_vocabulary_file(
    'Country',
    'E:/stech/Documents/Uni/4thYear/Honours/CountryNames.csv',
    vocabulary_size=None,
    dtype=tf.dtypes.string,
    default_value=None,
    num_oov_buckets=0)

countryNames_one_hot = feature_column.indicator_column(countryNames)

feature_columns.append(countryNames_one_hot)

for header in [
        'LeadTime', 'ArrivalDateWeekNumber', 'StaysInWeekendNights',
        'StaysInWeekNights', 'Adults', 'Children', 'Babies',
        'PreviousCancellations', 'PreviousBookingsNotCanceled',
        'BookingChanges', 'DaysInWaitingList', 'RequiredCarParkingSpaces',
        'TotalOfSpecialRequests', 'ADR', 'ArrivalDateDayOfMonth',
        'ArrivalDateYear', 'Agent', 'Company'