Ejemplo n.º 1
0
def define_last_training(oldmodel, new_training_set):
    model = Sequential([
        Dense(num_classes,
              activation='softmax',
              input_shape=(new_training_set.shape[1], ))
    ])
    model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accuracy'])
    all_weights = oldmodel.get_layer(index=-1).get_weights()

    model.get_layer(index=-1).set_weights(all_weights)
    return model
def calculateKerasEmbeddingMatrix(emb_size, embedding_names, df, batch_size=2):
    model = Sequential()
    model.add(
        Embedding(input_dim=7,
                  output_dim=emb_size,
                  input_length=1,
                  name="embedding"))
    model.add(Flatten())
    model.add(Dense(units=40, activation='relu'))
    model.add(Dense(units=10, activation='relu'))
    model.add(Dense(units=1))

    model.compile(loss='mse', optimizer='sgd', metrics=['accuracy'])

    hh = model.fit(x=df[['weekday']],
                   y=df[['scaled_users']],
                   epochs=50,
                   batch_size=batch_size)

    mm = model.get_layer('embedding')
    emb_matrix = mm.get_weights()[0]

    emp_df = pd.DataFrame(emb_matrix, columns=embedding_names)
    emp_df['weekday'] = np.arange(0, 7)

    return (emp_df)
Ejemplo n.º 3
0
def make_model(inp_shape, classes, features=False):
    # handmade model
    model = Sequential()
    model.add(Conv2D(16, (3, 3), input_shape=inp_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(64, name="features"))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))

    model.add(Dense(len(classes)))
    model.add(Activation('softmax'))
    print(model.summary())
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    intermediate_layer_model = Model(
        inputs=model.input, outputs=model.get_layer("features").output)

    if features:
        return model, intermediate_layer_model

    return model
Ejemplo n.º 4
0
        conv_layers.append(i.name)
#        print(model.get_layer(i.name).get_weights()[1].shape)
#        print(model.get_layer(i.name).get_config())


conv_layers = ["activation_1"]

model2 = model

#model2.add(Conv2DTranspose(3, (7,7), kernel_initializer=wi, bias_initializer=bi, input_shape=(55,55,64)))

ct = 0
for layer_name in conv_layers:
    
    ct += 1
    func = K.function([input_img], [model2.get_layer(layer_name).output])
    #func = K.function([input_img], [model2.model.get_layer(layer_name).output])

    img = image.load_img(input_img_name, target_size=(224,224))
    input_img_data = np.array([img_to_array(img)]).astype('float32')/255

    layer_outputs = func([input_img_data])[0]




    func = K.function([input_img], [model2.get_layer("max_pooling2d_1").output])
    #func = K.function([input_img], [model2.model.get_layer(layer_name).output])

    img = image.load_img(input_img_name, target_size=(224,224))
    input_img_data = np.array([img_to_array(img)]).astype('float32')/255
Ejemplo n.º 5
0
ffnn.compile(optimizer=SGD(lr=0.01),
             loss='categorical_crossentropy',
             metrics=['accuracy'])

#really fast run of the model, just 15 epochs and 100 batch size.  We can improve with more epochs and smaller batch sizes
#but the model slows down then.
ffnn.fit(x=X_train,
         y=y_train,
         epochs=100,
         batch_size=100,
         validation_data=[X_val, y_val])

#get accuracy
print(ffnn.evaluate(X_test, y_test))

#retrieve feature weights
feature_weights = ffnn.get_layer('input').get_weights()[0]

#get the ones that exceed a certain threshold in magnitude
selected = abs(
    feature_weights
) > 0.0001  #may need to figure out better threshold, study was using 1/1000 of max in the vector

#print weight and selection status
for i in range(20):
    print(feature_weights[i], end="--")
    print(selected[i])

#report total number selected
print("Total features selected: " + str(sum(selected)))
Ejemplo n.º 6
0
class ConvertToVector:
    def __init__(self, path, file, layer_name="vector_layer", dtype='float16'):
        self._model = None
        self._path = path
        self._file = file
        self.layer_name = layer_name
        self._data_gen = None
        self._train_gen = None
        self.image_set = self._get_image_names()
        self.fib_dict = {}
        self.special_num = []
        self.inverted_index = {}
        self.master_inverted_index = {}
        self.fp_index = {}
        backend.set_floatx(dtype)

    def _train_model(self, batch_size=32, epochs=30, model_type="vgg16"):

        if model_type == "vgg16":
            self._construct_model_vgg16()
            data_gen = ImageDataGenerator(samplewise_center=False,
                                          samplewise_std_normalization=True,
                                          rotation_range=0,
                                          width_shift_range=0,
                                          height_shift_range=0,
                                          horizontal_flip=False,
                                          zca_whitening=False)
            train_gen = data_gen.flow_from_directory(self._path,
                                                     target_size=(224, 224),
                                                     batch_size=batch_size,
                                                     class_mode='input',
                                                     shuffle=True,
                                                     seed=100)

        elif model_type == "convDeconv":
            self._construct_model_conv_deconv()

            data_gen = ImageDataGenerator(samplewise_center=False,
                                          samplewise_std_normalization=True,
                                          rotation_range=0,
                                          width_shift_range=0,
                                          height_shift_range=0,
                                          horizontal_flip=False,
                                          zca_whitening=False)
            train_gen = data_gen.flow_from_directory(self._path,
                                                     target_size=(640, 480),
                                                     batch_size=batch_size,
                                                     class_mode='input',
                                                     shuffle=True,
                                                     seed=100)

        self._model.fit_generator(train_gen,
                                  steps_per_epoch=len(self.image_set) //
                                  batch_size,
                                  epochs=epochs)
        self._model.save(
            "C:\\Users\\Jason\\Desktop\\Spring 2019\\Information retreival\\Project\\model.h5"
        )


#     def _train_model(self, batch_size=1000, cut_off = 100000):
#         image_set = self._get_image_names()
#         len_image_set = min(len(image_set),cut_off)
#         if len_image_set < batch_size:
#             batch_size = len_image_set
#         self._construct_model()
#         inner_index = 0
#         outer_index = inner_index + batch_size
#
#         while outer_index < len_image_set:
#             print("inner: ", inner_index)
#             print("outer: ",outer_index)
#             img = cv2.imread(image_set[0])
#
#            # x = []
#             y = []
#             while inner_index < outer_index:
#                 img = cv2.imread(image_set[inner_index])
#                 norm_image = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
#                 img = cv2.resize(norm_image,(224,224))
#               #  x.append(norm_image)
#                 y.append(img)
#                 inner_index+=1
#
#          #   x_train = np.array(x)
#             y_train = np.array(y)
#
#
#             print("Fitting model: ")
#
#             self._model.fit(y_train,y_train,batch_size = 64)
#             if outer_index + batch_size > len_image_set:
#                 outer_index = len_image_set
#             else:
#                 outer_index+=batch_size
#
#             del y_train , y
#
#         return image_set
# =============================================================================

    def _get_image_names(self):
        path = self._path + "\\" + self._file + "\\*.jpg"
        file_list = glob.glob(path)
        return file_list

    def _construct_model_vgg16(self):

        vgg16 = app.vgg16.VGG16()
        chop_num = 4

        for num in range(chop_num):
            vgg16.layers.pop()

        for layer in vgg16.layers:
            layer.trainable = False

        last_layer = vgg16.get_layer("block5_pool").output

        layer_1 = Conv2DTranspose(filters=512,
                                  kernel_size=4,
                                  strides=(1, 1),
                                  padding="valid",
                                  activation="relu",
                                  name="vector_layer",
                                  kernel_regularizer=regularizers.l2(0.01),
                                  kernel_initializer=initializers.RandomNormal(
                                      stddev=0.1))(last_layer)
        layer_2 = Conv2DTranspose(
            filters=256,
            kernel_size=4,
            strides=(2, 2),
            padding="valid",
            activation="relu",
            kernel_regularizer=regularizers.l2(0.01),
            kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_1)
        layer_3 = Conv2DTranspose(
            filters=128,
            kernel_size=5,
            strides=(1, 1),
            padding="valid",
            activation="relu",
            kernel_regularizer=regularizers.l2(0.01),
            kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_2)
        layer_4 = Conv2DTranspose(
            filters=64,
            kernel_size=4,
            strides=(2, 2),
            padding="valid",
            activation="relu",
            kernel_regularizer=regularizers.l2(0.01),
            kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_3)
        layer_5 = Conv2DTranspose(
            filters=32,
            kernel_size=4,
            strides=(2, 2),
            padding="valid",
            activation="relu",
            kernel_regularizer=regularizers.l2(0.01),
            kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_4)
        layer_6 = Conv2DTranspose(
            filters=16,
            kernel_size=3,
            strides=(1, 1),
            padding="valid",
            activation="relu",
            kernel_regularizer=regularizers.l2(0.01),
            kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_5)
        layer_7 = Conv2DTranspose(
            filters=3,
            kernel_size=2,
            strides=(2, 2),
            padding="valid",
            activation="tanh",
            kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_6)

        self._model = Model(input=vgg16.input, output=layer_7)
        self._model.summary()
        sgd = optimizers.SGD(lr=0.01)
        self._model.compile(optimizer=sgd,
                            loss='mean_squared_error',
                            metrics=['accuracy'])

        return True

    def _construct_model_conv_deconv(self):
        self._model = Sequential()
        self._model.add(
            Conv2D(filters=32,
                   input_shape=(640, 480, 3),
                   kernel_size=(5, 3),
                   strides=(2, 2),
                   activation="relu",
                   kernel_initializer=initializers.RandomNormal(stddev=0.1)))
        self._model.add(
            Conv2D(filters=64,
                   kernel_size=(3, 2),
                   strides=(2, 2),
                   activation="relu",
                   kernel_initializer=initializers.RandomNormal(stddev=0.1)))
        self._model.add(
            Conv2D(filters=128,
                   kernel_size=(3, 3),
                   strides=(2, 2),
                   activation="relu",
                   kernel_initializer=initializers.RandomNormal(stddev=0.1)))
        self._model.add(
            Conv2D(filters=1,
                   kernel_size=(2, 2),
                   strides=(2, 2),
                   activation="relu",
                   name=self.layer_name,
                   kernel_initializer=initializers.RandomNormal(stddev=0.001)))
        self._model.add(
            Conv2DTranspose(
                filters=128,
                kernel_size=(2, 2),
                strides=(2, 2),
                activation="relu",
                kernel_initializer=initializers.RandomNormal(stddev=0.001)))
        self._model.add(
            Conv2DTranspose(
                filters=64,
                kernel_size=(2, 2),
                strides=(2, 2),
                activation="relu",
                kernel_initializer=initializers.RandomNormal(stddev=0.1)))
        self._model.add(
            Conv2DTranspose(
                filters=32,
                kernel_size=(9, 10),
                strides=(2, 2),
                activation="relu",
                kernel_initializer=initializers.RandomNormal(stddev=0.1)))
        self._model.add(
            Conv2DTranspose(
                filters=3,
                kernel_size=(4, 2),
                strides=(2, 2),
                activation="relu",
                kernel_initializer=initializers.RandomNormal(stddev=0.1)))

        sgd = optimizers.SGD(lr=0.001,
                             decay=0.0001,
                             momentum=0.8,
                             nesterov=False)
        self._model.compile(optimizer="adagrad",
                            loss='mean_squared_error',
                            metrics=['accuracy'])
        self._model.summary()

        return True

    def _vectorize(self, img):

        intermediate_layer_model = Model(inputs=self._model.input,
                                         outputs=self._model.get_layer(
                                             self.layer_name).output)
        intermediate_output = intermediate_layer_model.predict(
            cv2.imread(img).reshape(1, 640, 480, 3))

        return intermediate_output.flatten()

    def fib(self, n):
        if n in self.fib_dict:
            return self.fib_dict[n]
        if n == 0 or n == 1:
            self.fib_dict[n] = 1
            return 1
        else:
            r = self.fib(n - 1) + self.fib(n - 2)
            self.fib_dict[n] = r
            return r

    def _special_number_generator(self, n=1131):
        for i in range(n):
            x = (self.fib(i) + i)
            if x % (i + 1) == 0:
                self.special_num.append(x % (i + 2))
            else:
                self.special_num.append(x % (i + 1))

        self.special_num = np.array(self.special_num).reshape(1131, 1)
        return True

    def build_index(self):
        self._special_number_generator()

        for i, img in enumerate(self.image_set):
            vec = self._vectorize(img)
            fp = int(vec.reshape(1, 1131).dot(self.special_num))
            name = img.split('\\')[-1]
            if fp in self.inverted_index:
                self.inverted_index[fp].append((name, vec.tolist()))
            else:
                self.inverted_index[fp] = [(name, vec.tolist())]
            if i % 100 == 0:
                with open(self._path + "//inverted_index.json", 'w') as f:
                    json.dump(self.inverted_index, f)

            print("Completed: ", i)

        with open(self._path + "//inverted_index.json", 'w') as f:
            json.dump(self.inverted_index, f)

    def load_json(self, file_name):
        with open(file_name) as infile:
            json_file = json.loads(infile.read())
        return json.loads(json_file)

    def load_and_append_mulitple_dicts(self):
        path = self._path + "\\*.p"
        file_list = glob.glob(path)
        for f in file_list:
            inverted_index = self.load_json(f)
            self.master_inverted_index = {
                **self.master_inverted_index,
                **inverted_index
            }

    def main(self):

        if len(glob.glob(self._path + "\\*.h5")) > 0:
            self._model = load_model(path + "\\model.h5")
            print("Loaded model")
        else:
            #vec._train_model(32)
            vec._train_model(batch_size=64, model_type="convDeconv")

        self.build_index()
filters = config.getint('NETWORK', 'filters')
n_grams = config.getint('NETWORK', 'n_gram')
vector_dim = input_shape[1]
dropout_1 = config.getfloat('NETWORK', 'dropout_1')
dense_neurons = config.getint('NETWORK', 'dense_neurons')
dropout_2 = config.getfloat('NETWORK', 'dropout_2')

model = Sequential()
model.add(
    Conv2D(filters,
           kernel_size=(n_grams, vector_dim),
           activation='relu',
           input_shape=input_shape,
           name='conv2d'))
model.add(
    MaxPooling2D(pool_size=(model.get_layer('conv2d').output_shape[1], 1)))
model.add(Dropout(dropout_1))
model.add(Flatten())
model.add(Dense(dense_neurons, activation='relu'))
model.add(Dropout(dropout_2))
model.add(Dense(1))  # on purpose no activation function

model.load_weights(model_path / 'trained_model.hdf5')

analyser_name = config.get('ANALYSER', 'analyser_name')
analyser = innvestigate.create_analyzer(analyser_name, model)

x_train = pickle.load(open(pickle_path / 'x_train.p', 'rb'))
x_test = pickle.load(open(pickle_path / 'x_test.p', 'rb'))

test_pred = model.predict(x_test)
Ejemplo n.º 8
0
class AutoEncoder:
    def __init__(self,
                 date_range,
                 symbol="AAPL",
                 data_file="calibration_data"):
        self.data = None
        for day in date_range:
            path = "fundamental_{}_{}.bz2".format(symbol,
                                                  day.strftime("%Y%m%d"))
            path = os.path.join(data_file, path)
            if os.path.exists(path):
                prices = pd.read_pickle(path, compression="bz2")
                if self.data is None:
                    self.data = prices.values.T
                else:
                    self.data = np.vstack([self.data, prices.values.T])
        scaler = MinMaxScaler()
        self.data_scaled = np.array(
            [scaler.fit_transform(d.reshape(-1, 1)) for d in self.data])
        self.data_scaled = self.data_scaled[:, :, 0]
        print("The data shape is", self.data_scaled.shape)

    def build_model(self, encode_length=16, activation="relu"):
        n_in = self.data_scaled.shape[1]
        self.encode_length = encode_length

        self.model = Sequential()
        self.model.add(Dense(128, activation=activation, name="encoder_l1"))
        self.model.add(Dense(64, activation=activation, name="encoder_l2"))
        self.model.add(
            Dense(encode_length, name="encoder_output", activation=None))
        self.model.add(Dense(64, activation=activation))
        self.model.add(Dense(128, activation=activation))
        self.model.add(Dense(n_in, activation=None))

        self.model.compile(optimizer='adam', loss='mse')
        self.model.build()

        return self.model

    def _reshape_data(self, data):
        if len(data.shape) == 3:
            return data
        if len(data.shape) == 2:
            return data[:, :, np.newaxis]
        if len(data.shape) == 1:
            return data[np.newaxis, :, np.newaxis]

    def train_model(self,
                    test_size=0.1,
                    val_size=0.1,
                    batch_size=16,
                    epochs=200,
                    stop_patience=10,
                    plot_test=True,
                    plot_history=True):
        x = self.data_scaled
        if test_size != 0.:
            x_train, x_test, y_train, y_test = train_test_split(
                x, x, test_size=test_size, random_state=42)
            print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
        else:
            x_train, y_train = x, x

        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=stop_patience,
                                       mode="min",
                                       verbose=2,
                                       restore_best_weights=True)
        result = self.model.fit(x_train,
                                y_train,
                                batch_size=batch_size,
                                epochs=epochs,
                                validation_split=val_size / (1 - test_size),
                                callbacks=[early_stopping])
        if plot_test:
            y_test_predict = self.model.predict(x_test)
            print(
                "test loss:",
                np.sum((y_test_predict - y_test)**2) /
                (y_test.shape[0] * y_test.shape[1]))
            plt.plot(y_test[0])
            plt.plot(y_test_predict[0])
            plt.ylabel("Scaled Price")
            plt.xlabel("Minutes")
            plt.title("Encode length {}".format(self.encode_length))
            plt.legend(["Real", "Predict"])
            plot_name = "sample"
            plt.savefig('{}_{}.png'.format(plot_name, self.encode_length))
            plt.show()
        if plot_history:
            self.loss_plot(result.history)

        return result

    def loss_plot(self, history, plot_name='Loss'):
        loss = np.asarray(history['loss'])
        val_loss = np.asarray(history['val_loss'])
        plt.style.use('seaborn')
        plt.figure(figsize=(12, 9), dpi=100)
        plt.grid(True)
        plt.plot(loss)
        plt.plot(val_loss)
        plt.legend(['loss', 'val_loss'])
        plt.title("Encode length {}".format(self.encode_length))
        plt.xlabel("Epochs")
        plt.ylabel("MSE")
        plt.savefig('{}_{}.png'.format(plot_name, self.encode_length))
        plt.show()

    def save_feature(self, plot_feature=False):
        feature_name = "AutoEncoderFeature_{}.npy".format(self.encode_length)
        encoder = Model(inputs=self.model.input,
                        outputs=self.model.get_layer('encoder_output').output)
        feature = encoder.predict(self.data_scaled)
        np.save("feature/" + feature_name, feature)

        if plot_feature:
            if self.encode_length == 8:
                fig, ax = plt.subplots(ncols=4, nrows=2, figsize=(12, 9))
                axes = ax.flatten()
                for i in range(feature.shape[1]):
                    sns.distplot(feature[:, i], ax=axes[i])
                plt.show()
                return

            for i in range(feature.shape[1]):
                sns.distplot(feature[:, i])
                plt.show()
            return

    def save_model(self):
        self.model.save("model/AutoEncoder_{}.h5".format(self.encode_length))

    def save_encoder_ws(self):
        w1, b1 = self.model.get_layer('encoder_l1').get_weights()
        w2, b2 = self.model.get_layer('encoder_l2').get_weights()
        w3, b3 = self.model.get_layer('encoder_output').get_weights()
        with open("model/AutoEncoder_w_{}.h5".format(self.encode_length),
                  "wb") as f:
            pickle.dump([w1, b1, w2, b2, w3, b3], f)

    def encode(self, x):
        encoder = Model(inputs=self.model.input,
                        outputs=self.model.get_layer('encoder_output').output)
        return encoder.predict(x)