コード例 #1
0
 def test_easy(self):
     f2vm = Feat2VecModel([
         {
             "name": "disc1",
             "type": "discrete",
             "vocab": 10
         },  # disc1 has a vocabulary of 10 words
         {
             "name": "cont1",
             "type": "real"
         },  #cont1 is a real number
         {
             "name": "disc100",
             "type": "discrete",
             "len": 100,
             "vocab": 10
         },  # disc100 is a sequence of 100 words, with a vocabulary of 10 words
         {
             "name": "cont100",
             "len": 100,
             "type": "real"
         },  #cont100 is a sequence of 100 numbers
     ])
     keras_model = f2vm.build_model(embedding_dimensions=5)
     keras_model.compile(loss='binary_crossentropy',
                         optimizer=tf.train.AdamOptimizer())
コード例 #2
0
 def test_easy_in(self):
     import keras.layers
     # define custom layer:
     input_layer = keras.layers.Input(shape=(1, ))
     custom_layer = keras.layers.Embedding(input_dim=1, output_dim=5)(
         input_layer)  #keeping it simple here
     # define feat2vec model
     f2vm = Feat2VecModel([
         {
             "name":
             "disc1",  # 'disc1 is a (single) discrete feature with a vocabulary of 10 words
             "type": "discrete",
             "vocab": 10
         },
         {
             "name": "cont1",  # 'cont1' is a sequence of 20 real numbers
             "type": "real",
             "len": 20
         },
         {
             "name": "custom",  # 'custom' is a custom-type feature
             "type": {
                 "input": input_layer,
                 "output": custom_layer
             }
         }
     ])
     keras_model = f2vm.build_model(
         embedding_dimensions=5)  # this returns a Keras network
     keras_model.compile(loss='binary_crossentropy',
                         optimizer=tf.train.AdamOptimizer())
コード例 #3
0
    def test_rai(self):
        dimensions = 10
        dup_dimension = 20
        EMBEDING_DIM = 5
        SEC_EMBEDDING = 5

        feature_names = ["principal", "f1", "f2", "f3", "f4", "f5"]

        # DEFINE CUSTOM LAYERS
        skream_rotator = Dense(units=dimensions,
                               activation="linear",
                               use_bias=False,
                               name="rotator")

        # Popularity
        f1_input = Input(batch_shape=(None, 1), name="f1")
        popularity_embed_inter = Embedding(input_dim=EMBEDING_DIM,
                                           output_dim=dup_dimension,
                                           name="embedding_dup",
                                           mask_zero=True)(f1_input)
        popularity_unmasker = Lambda(
            lambda x: x, name='unmasker_dup')(popularity_embed_inter)
        popularity_embed1 = Dense(
            units=dimensions,
            activation="linear",
            use_bias=False,
            name="resized_embedding")(popularity_unmasker)
        f1_embed = Reshape((dimensions, ))(popularity_embed1)

        # Principal id

        principal_input = Input(batch_shape=(None, 1), name="principal")
        principal_embedding = Embedding(input_dim=EMBEDING_DIM,
                                        output_dim=SEC_EMBEDDING,
                                        name="embedding_principal",
                                        mask_zero=True)(principal_input)
        principal_makser = Lambda(
            lambda x: x, name='unmasker_principal')(principal_embedding)
        principal_reshape = Reshape((SEC_EMBEDDING, ))(principal_makser)

        merged_principal = concatenate([principal_reshape, f1_embed])
        rotated_principal = Reshape(
            (dimensions, ),
            name="rotated_principal")(skream_rotator(merged_principal))

        # F2
        f2_input = Input(batch_shape=(None, 20), name="f2")
        f2_temp = Embedding(input_dim=5,
                            input_length=20,
                            output_dim=SEC_EMBEDDING,
                            mask_zero=True,
                            name="embedding_f2")(f2_input)
        avg_f2 = Reshape((SEC_EMBEDDING, ))(
            Lambda(lambda x: K.sum(x, axis=1, keepdims=True),
                   name="avg_f2_embedding")(f2_temp))

        merged_f2 = concatenate([avg_f2, f1_embed])
        f2_embed = Reshape((dimensions, ),
                           name="rotated_f2")(skream_rotator(merged_f2))

        # DEFINE FM MACHINE:
        feature_specification = []
        for feat in feature_names:
            if feat == "principal":
                feature_specification.append({
                    "name": "principal",
                    "type": {
                        "input": principal_input,
                        "output": rotated_principal
                    }
                })
            elif feat == "f1":
                feature_specification.append({
                    "name": "f1",
                    "type": {
                        "input": f1_input,
                        "output": f1_embed
                    }
                })
            elif feat == "f2":
                feature_specification.append({
                    "name": feat,
                    "type": {
                        "input": f2_input,
                        "output": f2_embed
                    }
                })
            elif feat == "f3":
                feature_specification.append({"name": feat, "type": "real"})
            else:
                feature_specification.append({
                    "name": feat,
                    "type": "discrete",
                    "len": 10,
                    "vocab": 100
                })
        fm = Feat2VecModel(features=feature_specification,
                           mask_zero=True,
                           obj='ns')

        groups = []
        groups.append(
            zip(["principal"] * (len(feature_names) - 1), feature_names[1:]))
        groups.append(zip(["f1"] * (len(feature_names) - 2),
                          feature_names[2:]))
        groups.append([("f1", "principal")])
        print(groups)
        keras_model = fm.build_model(dimensions,
                                     deep_out=True,
                                     deep_out_bias=False,
                                     deep_weight_groups=groups,
                                     dropout_layer=0.5,
                                     dropout_input=0.1)

        #f5 = keras_model.get_layer("dropout_embedding_f5")
        #assert f5.rate > 0.
        #f5.rate = 0.
        try:
            from keras.utils import plot_model
            plot_model(keras_model, to_file="rai.png")
        except:
            pass
コード例 #4
0
    def test_rai_faster(self):
        dimensions = 10
        EMBEDING_DIM = 5

        feature_names = ["principal", "f1", "f2", "f3", "f4", "f5"]

        # Principal id

        principal_input = Input(batch_shape=(None, 1), name="principal")
        int = Embedding(input_dim=EMBEDING_DIM,
                        output_dim=dimensions,
                        name="embedding_principal")(principal_input)
        principal_embedding = int  # Reshape((dimensions,1))(int)

        # DEFINE FM MACHINE:
        feature_specification = []
        for feat in feature_names:
            if feat == "principal":
                feature_specification.append({
                    "name": "principal",
                    "type": {
                        "input": principal_input,
                        "output": principal_embedding
                    }
                })

            elif feat == "f1":
                feature_specification.append({"name": feat, "type": "real"})
            else:
                feature_specification.append({
                    "name": feat,
                    "type": "discrete",
                    "len": 10,
                    "vocab": 100
                })
        fm = Feat2VecModel(features=feature_specification,
                           mask_zero=True,
                           obj='ns')

        groups = []
        groups.append([("principal", feature_names)])
        groups.append([("f1", feature_names)])
        print(groups)

        keras_model_collapsed = fm.build_model(dimensions,
                                               collapsed_type=2,
                                               deep_out=True,
                                               deep_out_bias=False,
                                               deep_weight_groups=groups,
                                               dropout_layer=0.5,
                                               dropout_input=0.1)

        keras_model_notcollapsed = fm.build_model(dimensions,
                                                  collapsed_type=None,
                                                  deep_out=True,
                                                  deep_out_bias=False,
                                                  deep_weight_groups=groups,
                                                  dropout_layer=0.5,
                                                  dropout_input=0.1)

        #f5 = keras_model_notcollapsed.get_layer("dropout_grp_000")
        #assert f5.rate > 0.
        #f5.rate = 0.
        try:
            from keras.utils import plot_model
            plot_model(keras_model_collapsed,
                       to_file="rai_faster_collapsed.png")
            plot_model(keras_model_notcollapsed,
                       to_file="rai_faster_notcollapsed.png")
        except:
            pass