def test_easy(self): f2vm = Feat2VecModel([ { "name": "disc1", "type": "discrete", "vocab": 10 }, # disc1 has a vocabulary of 10 words { "name": "cont1", "type": "real" }, #cont1 is a real number { "name": "disc100", "type": "discrete", "len": 100, "vocab": 10 }, # disc100 is a sequence of 100 words, with a vocabulary of 10 words { "name": "cont100", "len": 100, "type": "real" }, #cont100 is a sequence of 100 numbers ]) keras_model = f2vm.build_model(embedding_dimensions=5) keras_model.compile(loss='binary_crossentropy', optimizer=tf.train.AdamOptimizer())
def test_easy_in(self): import keras.layers # define custom layer: input_layer = keras.layers.Input(shape=(1, )) custom_layer = keras.layers.Embedding(input_dim=1, output_dim=5)( input_layer) #keeping it simple here # define feat2vec model f2vm = Feat2VecModel([ { "name": "disc1", # 'disc1 is a (single) discrete feature with a vocabulary of 10 words "type": "discrete", "vocab": 10 }, { "name": "cont1", # 'cont1' is a sequence of 20 real numbers "type": "real", "len": 20 }, { "name": "custom", # 'custom' is a custom-type feature "type": { "input": input_layer, "output": custom_layer } } ]) keras_model = f2vm.build_model( embedding_dimensions=5) # this returns a Keras network keras_model.compile(loss='binary_crossentropy', optimizer=tf.train.AdamOptimizer())
def test_rai(self): dimensions = 10 dup_dimension = 20 EMBEDING_DIM = 5 SEC_EMBEDDING = 5 feature_names = ["principal", "f1", "f2", "f3", "f4", "f5"] # DEFINE CUSTOM LAYERS skream_rotator = Dense(units=dimensions, activation="linear", use_bias=False, name="rotator") # Popularity f1_input = Input(batch_shape=(None, 1), name="f1") popularity_embed_inter = Embedding(input_dim=EMBEDING_DIM, output_dim=dup_dimension, name="embedding_dup", mask_zero=True)(f1_input) popularity_unmasker = Lambda( lambda x: x, name='unmasker_dup')(popularity_embed_inter) popularity_embed1 = Dense( units=dimensions, activation="linear", use_bias=False, name="resized_embedding")(popularity_unmasker) f1_embed = Reshape((dimensions, ))(popularity_embed1) # Principal id principal_input = Input(batch_shape=(None, 1), name="principal") principal_embedding = Embedding(input_dim=EMBEDING_DIM, output_dim=SEC_EMBEDDING, name="embedding_principal", mask_zero=True)(principal_input) principal_makser = Lambda( lambda x: x, name='unmasker_principal')(principal_embedding) principal_reshape = Reshape((SEC_EMBEDDING, ))(principal_makser) merged_principal = concatenate([principal_reshape, f1_embed]) rotated_principal = Reshape( (dimensions, ), name="rotated_principal")(skream_rotator(merged_principal)) # F2 f2_input = Input(batch_shape=(None, 20), name="f2") f2_temp = Embedding(input_dim=5, input_length=20, output_dim=SEC_EMBEDDING, mask_zero=True, name="embedding_f2")(f2_input) avg_f2 = Reshape((SEC_EMBEDDING, ))( Lambda(lambda x: K.sum(x, axis=1, keepdims=True), name="avg_f2_embedding")(f2_temp)) merged_f2 = concatenate([avg_f2, f1_embed]) f2_embed = Reshape((dimensions, ), name="rotated_f2")(skream_rotator(merged_f2)) # DEFINE FM MACHINE: feature_specification = [] for feat in feature_names: if feat == "principal": feature_specification.append({ "name": "principal", "type": { "input": principal_input, "output": rotated_principal } }) elif feat == "f1": feature_specification.append({ "name": "f1", "type": { "input": f1_input, "output": f1_embed } }) elif feat == "f2": feature_specification.append({ "name": feat, "type": { "input": f2_input, "output": f2_embed } }) elif feat == "f3": feature_specification.append({"name": feat, "type": "real"}) else: feature_specification.append({ "name": feat, "type": "discrete", "len": 10, "vocab": 100 }) fm = Feat2VecModel(features=feature_specification, mask_zero=True, obj='ns') groups = [] groups.append( zip(["principal"] * (len(feature_names) - 1), feature_names[1:])) groups.append(zip(["f1"] * (len(feature_names) - 2), feature_names[2:])) groups.append([("f1", "principal")]) print(groups) keras_model = fm.build_model(dimensions, deep_out=True, deep_out_bias=False, deep_weight_groups=groups, dropout_layer=0.5, dropout_input=0.1) #f5 = keras_model.get_layer("dropout_embedding_f5") #assert f5.rate > 0. #f5.rate = 0. try: from keras.utils import plot_model plot_model(keras_model, to_file="rai.png") except: pass
def test_rai_faster(self): dimensions = 10 EMBEDING_DIM = 5 feature_names = ["principal", "f1", "f2", "f3", "f4", "f5"] # Principal id principal_input = Input(batch_shape=(None, 1), name="principal") int = Embedding(input_dim=EMBEDING_DIM, output_dim=dimensions, name="embedding_principal")(principal_input) principal_embedding = int # Reshape((dimensions,1))(int) # DEFINE FM MACHINE: feature_specification = [] for feat in feature_names: if feat == "principal": feature_specification.append({ "name": "principal", "type": { "input": principal_input, "output": principal_embedding } }) elif feat == "f1": feature_specification.append({"name": feat, "type": "real"}) else: feature_specification.append({ "name": feat, "type": "discrete", "len": 10, "vocab": 100 }) fm = Feat2VecModel(features=feature_specification, mask_zero=True, obj='ns') groups = [] groups.append([("principal", feature_names)]) groups.append([("f1", feature_names)]) print(groups) keras_model_collapsed = fm.build_model(dimensions, collapsed_type=2, deep_out=True, deep_out_bias=False, deep_weight_groups=groups, dropout_layer=0.5, dropout_input=0.1) keras_model_notcollapsed = fm.build_model(dimensions, collapsed_type=None, deep_out=True, deep_out_bias=False, deep_weight_groups=groups, dropout_layer=0.5, dropout_input=0.1) #f5 = keras_model_notcollapsed.get_layer("dropout_grp_000") #assert f5.rate > 0. #f5.rate = 0. try: from keras.utils import plot_model plot_model(keras_model_collapsed, to_file="rai_faster_collapsed.png") plot_model(keras_model_notcollapsed, to_file="rai_faster_notcollapsed.png") except: pass