def define_last_training(oldmodel, new_training_set): model = Sequential([ Dense(num_classes, activation='softmax', input_shape=(new_training_set.shape[1], )) ]) model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy']) all_weights = oldmodel.get_layer(index=-1).get_weights() model.get_layer(index=-1).set_weights(all_weights) return model
def calculateKerasEmbeddingMatrix(emb_size, embedding_names, df, batch_size=2): model = Sequential() model.add( Embedding(input_dim=7, output_dim=emb_size, input_length=1, name="embedding")) model.add(Flatten()) model.add(Dense(units=40, activation='relu')) model.add(Dense(units=10, activation='relu')) model.add(Dense(units=1)) model.compile(loss='mse', optimizer='sgd', metrics=['accuracy']) hh = model.fit(x=df[['weekday']], y=df[['scaled_users']], epochs=50, batch_size=batch_size) mm = model.get_layer('embedding') emb_matrix = mm.get_weights()[0] emp_df = pd.DataFrame(emb_matrix, columns=embedding_names) emp_df['weekday'] = np.arange(0, 7) return (emp_df)
def make_model(inp_shape, classes, features=False): # handmade model model = Sequential() model.add(Conv2D(16, (3, 3), input_shape=inp_shape)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.2)) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.2)) model.add(Flatten()) model.add(Dense(64, name="features")) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(len(classes))) model.add(Activation('softmax')) print(model.summary()) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) intermediate_layer_model = Model( inputs=model.input, outputs=model.get_layer("features").output) if features: return model, intermediate_layer_model return model
conv_layers.append(i.name) # print(model.get_layer(i.name).get_weights()[1].shape) # print(model.get_layer(i.name).get_config()) conv_layers = ["activation_1"] model2 = model #model2.add(Conv2DTranspose(3, (7,7), kernel_initializer=wi, bias_initializer=bi, input_shape=(55,55,64))) ct = 0 for layer_name in conv_layers: ct += 1 func = K.function([input_img], [model2.get_layer(layer_name).output]) #func = K.function([input_img], [model2.model.get_layer(layer_name).output]) img = image.load_img(input_img_name, target_size=(224,224)) input_img_data = np.array([img_to_array(img)]).astype('float32')/255 layer_outputs = func([input_img_data])[0] func = K.function([input_img], [model2.get_layer("max_pooling2d_1").output]) #func = K.function([input_img], [model2.model.get_layer(layer_name).output]) img = image.load_img(input_img_name, target_size=(224,224)) input_img_data = np.array([img_to_array(img)]).astype('float32')/255
ffnn.compile(optimizer=SGD(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy']) #really fast run of the model, just 15 epochs and 100 batch size. We can improve with more epochs and smaller batch sizes #but the model slows down then. ffnn.fit(x=X_train, y=y_train, epochs=100, batch_size=100, validation_data=[X_val, y_val]) #get accuracy print(ffnn.evaluate(X_test, y_test)) #retrieve feature weights feature_weights = ffnn.get_layer('input').get_weights()[0] #get the ones that exceed a certain threshold in magnitude selected = abs( feature_weights ) > 0.0001 #may need to figure out better threshold, study was using 1/1000 of max in the vector #print weight and selection status for i in range(20): print(feature_weights[i], end="--") print(selected[i]) #report total number selected print("Total features selected: " + str(sum(selected)))
class ConvertToVector: def __init__(self, path, file, layer_name="vector_layer", dtype='float16'): self._model = None self._path = path self._file = file self.layer_name = layer_name self._data_gen = None self._train_gen = None self.image_set = self._get_image_names() self.fib_dict = {} self.special_num = [] self.inverted_index = {} self.master_inverted_index = {} self.fp_index = {} backend.set_floatx(dtype) def _train_model(self, batch_size=32, epochs=30, model_type="vgg16"): if model_type == "vgg16": self._construct_model_vgg16() data_gen = ImageDataGenerator(samplewise_center=False, samplewise_std_normalization=True, rotation_range=0, width_shift_range=0, height_shift_range=0, horizontal_flip=False, zca_whitening=False) train_gen = data_gen.flow_from_directory(self._path, target_size=(224, 224), batch_size=batch_size, class_mode='input', shuffle=True, seed=100) elif model_type == "convDeconv": self._construct_model_conv_deconv() data_gen = ImageDataGenerator(samplewise_center=False, samplewise_std_normalization=True, rotation_range=0, width_shift_range=0, height_shift_range=0, horizontal_flip=False, zca_whitening=False) train_gen = data_gen.flow_from_directory(self._path, target_size=(640, 480), batch_size=batch_size, class_mode='input', shuffle=True, seed=100) self._model.fit_generator(train_gen, steps_per_epoch=len(self.image_set) // batch_size, epochs=epochs) self._model.save( "C:\\Users\\Jason\\Desktop\\Spring 2019\\Information retreival\\Project\\model.h5" ) # def _train_model(self, batch_size=1000, cut_off = 100000): # image_set = self._get_image_names() # len_image_set = min(len(image_set),cut_off) # if len_image_set < batch_size: # batch_size = len_image_set # self._construct_model() # inner_index = 0 # outer_index = inner_index + batch_size # # while outer_index < len_image_set: # print("inner: ", inner_index) # print("outer: ",outer_index) # img = cv2.imread(image_set[0]) # # # x = [] # y = [] # while inner_index < outer_index: # img = cv2.imread(image_set[inner_index]) # norm_image = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) # img = cv2.resize(norm_image,(224,224)) # # x.append(norm_image) # y.append(img) # inner_index+=1 # # # x_train = np.array(x) # y_train = np.array(y) # # # print("Fitting model: ") # # self._model.fit(y_train,y_train,batch_size = 64) # if outer_index + batch_size > len_image_set: # outer_index = len_image_set # else: # outer_index+=batch_size # # del y_train , y # # return image_set # ============================================================================= def _get_image_names(self): path = self._path + "\\" + self._file + "\\*.jpg" file_list = glob.glob(path) return file_list def _construct_model_vgg16(self): vgg16 = app.vgg16.VGG16() chop_num = 4 for num in range(chop_num): vgg16.layers.pop() for layer in vgg16.layers: layer.trainable = False last_layer = vgg16.get_layer("block5_pool").output layer_1 = Conv2DTranspose(filters=512, kernel_size=4, strides=(1, 1), padding="valid", activation="relu", name="vector_layer", kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.RandomNormal( stddev=0.1))(last_layer) layer_2 = Conv2DTranspose( filters=256, kernel_size=4, strides=(2, 2), padding="valid", activation="relu", kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_1) layer_3 = Conv2DTranspose( filters=128, kernel_size=5, strides=(1, 1), padding="valid", activation="relu", kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_2) layer_4 = Conv2DTranspose( filters=64, kernel_size=4, strides=(2, 2), padding="valid", activation="relu", kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_3) layer_5 = Conv2DTranspose( filters=32, kernel_size=4, strides=(2, 2), padding="valid", activation="relu", kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_4) layer_6 = Conv2DTranspose( filters=16, kernel_size=3, strides=(1, 1), padding="valid", activation="relu", kernel_regularizer=regularizers.l2(0.01), kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_5) layer_7 = Conv2DTranspose( filters=3, kernel_size=2, strides=(2, 2), padding="valid", activation="tanh", kernel_initializer=initializers.RandomNormal(stddev=0.1))(layer_6) self._model = Model(input=vgg16.input, output=layer_7) self._model.summary() sgd = optimizers.SGD(lr=0.01) self._model.compile(optimizer=sgd, loss='mean_squared_error', metrics=['accuracy']) return True def _construct_model_conv_deconv(self): self._model = Sequential() self._model.add( Conv2D(filters=32, input_shape=(640, 480, 3), kernel_size=(5, 3), strides=(2, 2), activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.1))) self._model.add( Conv2D(filters=64, kernel_size=(3, 2), strides=(2, 2), activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.1))) self._model.add( Conv2D(filters=128, kernel_size=(3, 3), strides=(2, 2), activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.1))) self._model.add( Conv2D(filters=1, kernel_size=(2, 2), strides=(2, 2), activation="relu", name=self.layer_name, kernel_initializer=initializers.RandomNormal(stddev=0.001))) self._model.add( Conv2DTranspose( filters=128, kernel_size=(2, 2), strides=(2, 2), activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.001))) self._model.add( Conv2DTranspose( filters=64, kernel_size=(2, 2), strides=(2, 2), activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.1))) self._model.add( Conv2DTranspose( filters=32, kernel_size=(9, 10), strides=(2, 2), activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.1))) self._model.add( Conv2DTranspose( filters=3, kernel_size=(4, 2), strides=(2, 2), activation="relu", kernel_initializer=initializers.RandomNormal(stddev=0.1))) sgd = optimizers.SGD(lr=0.001, decay=0.0001, momentum=0.8, nesterov=False) self._model.compile(optimizer="adagrad", loss='mean_squared_error', metrics=['accuracy']) self._model.summary() return True def _vectorize(self, img): intermediate_layer_model = Model(inputs=self._model.input, outputs=self._model.get_layer( self.layer_name).output) intermediate_output = intermediate_layer_model.predict( cv2.imread(img).reshape(1, 640, 480, 3)) return intermediate_output.flatten() def fib(self, n): if n in self.fib_dict: return self.fib_dict[n] if n == 0 or n == 1: self.fib_dict[n] = 1 return 1 else: r = self.fib(n - 1) + self.fib(n - 2) self.fib_dict[n] = r return r def _special_number_generator(self, n=1131): for i in range(n): x = (self.fib(i) + i) if x % (i + 1) == 0: self.special_num.append(x % (i + 2)) else: self.special_num.append(x % (i + 1)) self.special_num = np.array(self.special_num).reshape(1131, 1) return True def build_index(self): self._special_number_generator() for i, img in enumerate(self.image_set): vec = self._vectorize(img) fp = int(vec.reshape(1, 1131).dot(self.special_num)) name = img.split('\\')[-1] if fp in self.inverted_index: self.inverted_index[fp].append((name, vec.tolist())) else: self.inverted_index[fp] = [(name, vec.tolist())] if i % 100 == 0: with open(self._path + "//inverted_index.json", 'w') as f: json.dump(self.inverted_index, f) print("Completed: ", i) with open(self._path + "//inverted_index.json", 'w') as f: json.dump(self.inverted_index, f) def load_json(self, file_name): with open(file_name) as infile: json_file = json.loads(infile.read()) return json.loads(json_file) def load_and_append_mulitple_dicts(self): path = self._path + "\\*.p" file_list = glob.glob(path) for f in file_list: inverted_index = self.load_json(f) self.master_inverted_index = { **self.master_inverted_index, **inverted_index } def main(self): if len(glob.glob(self._path + "\\*.h5")) > 0: self._model = load_model(path + "\\model.h5") print("Loaded model") else: #vec._train_model(32) vec._train_model(batch_size=64, model_type="convDeconv") self.build_index()
filters = config.getint('NETWORK', 'filters') n_grams = config.getint('NETWORK', 'n_gram') vector_dim = input_shape[1] dropout_1 = config.getfloat('NETWORK', 'dropout_1') dense_neurons = config.getint('NETWORK', 'dense_neurons') dropout_2 = config.getfloat('NETWORK', 'dropout_2') model = Sequential() model.add( Conv2D(filters, kernel_size=(n_grams, vector_dim), activation='relu', input_shape=input_shape, name='conv2d')) model.add( MaxPooling2D(pool_size=(model.get_layer('conv2d').output_shape[1], 1))) model.add(Dropout(dropout_1)) model.add(Flatten()) model.add(Dense(dense_neurons, activation='relu')) model.add(Dropout(dropout_2)) model.add(Dense(1)) # on purpose no activation function model.load_weights(model_path / 'trained_model.hdf5') analyser_name = config.get('ANALYSER', 'analyser_name') analyser = innvestigate.create_analyzer(analyser_name, model) x_train = pickle.load(open(pickle_path / 'x_train.p', 'rb')) x_test = pickle.load(open(pickle_path / 'x_test.p', 'rb')) test_pred = model.predict(x_test)
class AutoEncoder: def __init__(self, date_range, symbol="AAPL", data_file="calibration_data"): self.data = None for day in date_range: path = "fundamental_{}_{}.bz2".format(symbol, day.strftime("%Y%m%d")) path = os.path.join(data_file, path) if os.path.exists(path): prices = pd.read_pickle(path, compression="bz2") if self.data is None: self.data = prices.values.T else: self.data = np.vstack([self.data, prices.values.T]) scaler = MinMaxScaler() self.data_scaled = np.array( [scaler.fit_transform(d.reshape(-1, 1)) for d in self.data]) self.data_scaled = self.data_scaled[:, :, 0] print("The data shape is", self.data_scaled.shape) def build_model(self, encode_length=16, activation="relu"): n_in = self.data_scaled.shape[1] self.encode_length = encode_length self.model = Sequential() self.model.add(Dense(128, activation=activation, name="encoder_l1")) self.model.add(Dense(64, activation=activation, name="encoder_l2")) self.model.add( Dense(encode_length, name="encoder_output", activation=None)) self.model.add(Dense(64, activation=activation)) self.model.add(Dense(128, activation=activation)) self.model.add(Dense(n_in, activation=None)) self.model.compile(optimizer='adam', loss='mse') self.model.build() return self.model def _reshape_data(self, data): if len(data.shape) == 3: return data if len(data.shape) == 2: return data[:, :, np.newaxis] if len(data.shape) == 1: return data[np.newaxis, :, np.newaxis] def train_model(self, test_size=0.1, val_size=0.1, batch_size=16, epochs=200, stop_patience=10, plot_test=True, plot_history=True): x = self.data_scaled if test_size != 0.: x_train, x_test, y_train, y_test = train_test_split( x, x, test_size=test_size, random_state=42) print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) else: x_train, y_train = x, x early_stopping = EarlyStopping(monitor='val_loss', patience=stop_patience, mode="min", verbose=2, restore_best_weights=True) result = self.model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=val_size / (1 - test_size), callbacks=[early_stopping]) if plot_test: y_test_predict = self.model.predict(x_test) print( "test loss:", np.sum((y_test_predict - y_test)**2) / (y_test.shape[0] * y_test.shape[1])) plt.plot(y_test[0]) plt.plot(y_test_predict[0]) plt.ylabel("Scaled Price") plt.xlabel("Minutes") plt.title("Encode length {}".format(self.encode_length)) plt.legend(["Real", "Predict"]) plot_name = "sample" plt.savefig('{}_{}.png'.format(plot_name, self.encode_length)) plt.show() if plot_history: self.loss_plot(result.history) return result def loss_plot(self, history, plot_name='Loss'): loss = np.asarray(history['loss']) val_loss = np.asarray(history['val_loss']) plt.style.use('seaborn') plt.figure(figsize=(12, 9), dpi=100) plt.grid(True) plt.plot(loss) plt.plot(val_loss) plt.legend(['loss', 'val_loss']) plt.title("Encode length {}".format(self.encode_length)) plt.xlabel("Epochs") plt.ylabel("MSE") plt.savefig('{}_{}.png'.format(plot_name, self.encode_length)) plt.show() def save_feature(self, plot_feature=False): feature_name = "AutoEncoderFeature_{}.npy".format(self.encode_length) encoder = Model(inputs=self.model.input, outputs=self.model.get_layer('encoder_output').output) feature = encoder.predict(self.data_scaled) np.save("feature/" + feature_name, feature) if plot_feature: if self.encode_length == 8: fig, ax = plt.subplots(ncols=4, nrows=2, figsize=(12, 9)) axes = ax.flatten() for i in range(feature.shape[1]): sns.distplot(feature[:, i], ax=axes[i]) plt.show() return for i in range(feature.shape[1]): sns.distplot(feature[:, i]) plt.show() return def save_model(self): self.model.save("model/AutoEncoder_{}.h5".format(self.encode_length)) def save_encoder_ws(self): w1, b1 = self.model.get_layer('encoder_l1').get_weights() w2, b2 = self.model.get_layer('encoder_l2').get_weights() w3, b3 = self.model.get_layer('encoder_output').get_weights() with open("model/AutoEncoder_w_{}.h5".format(self.encode_length), "wb") as f: pickle.dump([w1, b1, w2, b2, w3, b3], f) def encode(self, x): encoder = Model(inputs=self.model.input, outputs=self.model.get_layer('encoder_output').output) return encoder.predict(x)