class EmailFilter: def __init__(self): self.load_ner() self.load_image_model() def load_ner(self): self.ner = NER() self.ner.load(os.path.join(os.path.expanduser("~"),"ShellPrivacyFilterDemo","backend", "models", "ner_model")) def load_image_model(self): self.keras_model = KerasModel() self.keras_model.load_model() def email_ner(self, text): return self.ner.test(text) def attachment_ner(self, attachment_docx): self.de.extract_text(attachment_docx, "text") name = os.path.basename(os.path.normpath(attachment_docx)) with open(os.path.join(os.path.expanduser("~"), "ShellPrivacyFilterDemo", "data", name.split(".")[0] + "_text.txt"), "r") as file: text = file.read() ner_result = self.ner.test(text) if len(ner_result) > 0: return True return False def attachment_image(self, attachment_docx): self.de.extract_images(attachment_docx, "images") predictions = [] for image in self.de.images: predictions.append(self.keras_model.test(os.path.join(os.path.expanduser("~"), "ShellPrivacyFilterDemo", "data", "images", image))) if "Confidential" in predictions: return True return False def attachment_scan(self, attachment_docx): self.de = DocumentExtract() unsafe_text = self.attachment_ner(attachment_docx) unsafe_image = self.attachment_image(attachment_docx) if unsafe_text == True or unsafe_image == True: return "issue" return "no issue"
def run(model_id): """Run experiment.""" config = configs[model_id] logger.info('\n\n\ntrain model {}'.format(model_id)) # prepare data if config['preprocess_fn'] is not None: function = getattr(data_generator, config['preprocess_fn']) preprocess_fn = partial(function, **config['preprocess']) else: preprocess_fn = None generator = Generator(path=PATH_TRAIN, IDs=meta_train.index.tolist(), labels=meta_train[['target']], preprocessing_fn=preprocess_fn, shuffle=False, batch_size=64, **config['generator']) X, y = generate_train_data(generator, meta_train) logger.info('X shape: {}, y shape: {}'.format(X.shape, y.shape)) # define model model_function = getattr(models, config['model_name']) nn_model = partial(model_function, input_shape=(X.shape[1:]), **config['model_params']) nn_model().summary(print_fn=logger.info) model = KerasModel(nn_model, logger=logger, **config['train']) # train and save model cross_val = CrossValidation(X=X, y=y, Xtest=X[:100], logger=logger, **config['cv']) pred, pred_test, metrics, trained_models = cross_val.run_cv(model) for i, model in enumerate(trained_models): path = os.path.join(MODELS_PATH, 'model_{}_{}.h5'.format(model_id, i)) model.save(path)
def analyze(json_path, weight_path): dev_images, dev_labels = KerasModel.load_images_and_labels( constants.FULL_SQUAT_DEV_FOLDER) image_names = Utils.get_image_names(constants.FULL_SQUAT_DEV_FOLDER) model = ModelAnalysis.load_model(json_path, weight_path) predictions = model.predict_on_batch(dev_images) prediction_labels = [] for prediction in predictions: prediction_labels.append(np.argmax(prediction)) for i in range(len(dev_labels)): if dev_labels[i] != prediction_labels[i]: print("{} label: {} predict: {}".format( image_names[i], dev_labels[i], prediction_labels[i]))
def main(args): if len(args) < 2: sys.stderr.write('Two required arguments: <train|classify|optimize> <data directory>\n') sys.exit(-1) if args[0] == 'train': working_dir = args[1] model = CnnEntityModel() train_x, train_y = model.read_training_instances(working_dir) trained_model, history = model.train_model_for_data(train_x, train_y, 200, model.get_default_config(), checkpoint_prefix='cnn_best_model', early_stopping=True) model.write_model(working_dir, trained_model) elif args[0] == 'classify': working_dir = args[1] model = KerasModel.read_model(working_dir) while True: try: line = sys.stdin.readline().rstrip() if not line: break label = model.classify_line(line) print(label) sys.stdout.flush() except Exception as e: print("Exception %s" % (e) ) elif args[0] == 'optimize': working_dir = args[1] model = CnnEntityModel() train_x, train_y = model.read_training_instances(working_dir) optim = RandomSearch(model, train_x, train_y) best_config = optim.optimize() print("Best config: %s" % best_config) else: sys.stderr.write("Do not recognize args[0] command argument: %s\n" % (args[0])) sys.exit(-1)
#tfds.as_dataframe(ds_train.take(4), ds_info) def scale(image, label): # TFDS provide the images as tf.uint8, while the model expect tf.float32, so normalize images return tf.cast(image, tf.float32) / 255., label ds_train = ds_train.map(scale, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds_train = ds_train.cache() ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples) ds_train = ds_train.batch(128) ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE) model = KerasModel(input_shape=(28, 28, 1)) ds_test = ds_test.map(scale, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds_test = ds_test.batch(128) ds_test = ds_test.cache() ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE) logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) model.compile( tf.keras.optimizers.Adam(0.001), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) model.fit(ds_train,
def load_image_model(self): self.keras_model = KerasModel() self.keras_model.load_model()
def get_script_path(): if hasattr(sys, 'ps1') or sys.flags.interactive: #python -i return os.getcwd() else: return os.path.dirname(os.path.realpath(__file__)) def create_log_dir(): path = get_script_path() logdir = os.path.join(path, 'logs', str(datetime.now().strftime("%Y%m%d_%H%M%S"))) createdir_safe(logdir) return logdir def remove_previous_tests(): path = get_script_path() dirpath = os.path.join(path, 'logs') if os.path.exists(dirpath) and os.path.isdir(dirpath): shutil.rmtree(dirpath) if __name__ == '__main__': dataset = Dataset() dataset.prepare_dataset() keras_model = KerasModel(dataset) remove_previous_tests() keras_model.train(create_log_dir()) keras_model.predict_dataset()
print("train_x", train_x.shape, train_x.dtype) # Reshape data train_x = train_x.reshape( (len(train_x), img_width, img_height, img_num_channels)) test_x = test_x.reshape((len(test_x), img_width, img_height, img_num_channels)) print("train_x", train_x.shape, train_x.dtype) input_shape = (img_width, img_height, img_num_channels) logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir) model = KerasModel(input_shape) # tf.keras.utils.plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) model.compile( loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(0.001), metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) # Fit data to model history = model.fit(train_x, train_y, callbacks=[tensorboard_callback], batch_size=50, epochs=6, verbose=True, validation_split=0.2) model.summary()
# Training and validation generators print('Training generator') train_generator, train_steps_per_epoch = get_data_generator_and_steps_per_epoch( train_samples, BATCHSIZE, multivariant=MULTIVARIANT) print('Validation generator') validation_generator, validation_steps_per_epoch = get_data_generator_and_steps_per_epoch( validation_samples, BATCHSIZE, validation=True, multivariant=MULTIVARIANT) print('Training steps per epoch {}'.format(train_steps_per_epoch)) print('Validation steps per epoch {}'.format(validation_steps_per_epoch)) model_file = 'model_combined_last_0.2_drop_batch_new_augmentation.h5' # Initializing a KerasMoel instance k_model = KerasModel(1, keras_model.NVIDIA_ARCHITECTURE, dropout=0.2, batch_norm=BATCH_NORM, model_file=model_file, multivariant=MULTIVARIANT, gray=GRAY, load=False) # Training the KerasModel model and getting the metrics model_history = k_model.train_model_with_generator(train_generator, train_steps_per_epoch, EPOCHS, validation_generator, validation_steps_per_epoch, save_model_filepath=model_file) # model_history = k_model.train_learned_model_with_generator(train_generator, # train_steps_per_epoch, # EPOCHS, # validation_generator, # validation_steps_per_epoch,
val_feats, val_labels, val_ids = get_feats_labels_ids(val) return train_feats, train_labels, val_feats, val_labels def get_real_data(): #df = util.load_data_to_dataframe('dataset/val_test_split.json') #unseen_test = create_features(df) #unseen_test.to_csv('cache/val_test_split.csv', index=False) unseen_test = filterout_mac_features(pd.read_csv('cache/val_test_split.csv')) train_feats, train_labels, _ = get_feats_labels_ids(unseen_test) return train_feats, train_labels X, Y, X_test, Y_test = get_data() # PART 2 FIT MODEL model = KerasModel() model.fit(X, Y) print("predicting on kfold validation") # PART 5 EVALUATE ON UNSEEN X_real, Y_real = get_real_data() real_predict = model.predict(X_real) print(f"Average f1s on unseen: {f1_score(Y_real, real_predict, average='micro')}") # PART 6 PREPARE SUBMISSION def get_data_for_submitting(): #df_test = util.load_data_to_dataframe('dataset/test.json') #prepared_df = create_features(df_test)
} bst3 = GradientBoostingClassifier(**params_est) bst3.fit(X_train, y_train) # ------------------------------------------------------------------ from keras.callbacks import Callback as keras_clb random.seed(666) np.random.seed(666) class LearningRateClb(keras_clb): def on_epoch_end(self, epoch, logs={}): if epoch == 300: self.model.optimizer.lr.set_value(0.01) bst4 = KerasModel(cols_k2, 600) bst4.fit_process(X_train_nn, y_train) bst4.fit(X_train_nn, y_train, callbacks=[LearningRateClb()]) # ------------------------------------------------------------------ bst5 = LogisticRegression() bst5.fit(X_train_reg, y_train) # ------------------------------------------------------------------ params = { 'silent': 1, 'objective': 'binary:logistic', 'max_depth': 3, 'eta': 0.01, 'subsample': 0.65, 'colsample_bytree': 0.3, 'min_child_weight': 5, 'n': 1140,
# model.add(MaxPooling2D()) # # Flattening the Images after the convolutional steps # model.add(Flatten()) # # Fist dense layer # model.add(Dense(120)) # # Second dense layer # model.add(Dense(84)) # # Logits layer # model.add(Dense(1)) # # Defining the loss function and optimizer # model.compile(loss='mse', optimizer='adam') training_lenght = math.ceil((len(train_samples)*3*2) / BATCHSIZE) validation_length = math.ceil((len(validation_samples)*3*2) / BATCHSIZE) # print(len(list(train_generator))) k_model = KerasModel(1, keras_model.LENET_ARCHITECTURE) model_history = k_model.train_model_with_generator(train_generator, training_lenght, EPOCHS, validation_generator, validation_length, save_model_filepath='model_modular.h5') # model_history = model.fit_generator(train_generator, # steps_per_epoch=training_lenght, # validation_data=validation_generator, # validation_steps=validation_length, # epochs=EPOCHS, verbose=1) # # model.save('model.h5') # model.save('model_track2.h5') plot_loss(model_history=model_history)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if model_to_run == "KERAS_CNN": # ################################################ KERAS MODEL ################################################### # # These file paths are specified so that model parameters can be saved after training model_name_json_path = os.path.join(os.path.abspath(__file__), '..', 'data', 'Keras_best_model.json') model_name_h5_path = os.path.join(os.path.abspath(__file__), '..', 'data', 'Keras_best_model.h5') y_train = train_labels.numpy() X_train = data_loaders_original["train"].dataset.imgs X_test = data_loaders_original["test"].dataset.imgs keras_model = KerasModel(model_name_json_path=model_name_json_path, model_name_h5_path=model_name_h5_path, X=X_train) keras_model.fit(X=X_train, y=y_train) y_pred = keras_model.predict(X_train) tr_error = np.mean(y_pred != y_train[:, None]) print(f"Keras Model Training Error is: {tr_error}") test_labels = keras_model.predict(X_test) save_results_in_csv(test_labels) elif model_to_run == "TRANSFER_LEARNING": ### load Resnet152 pre-trained model model_conv = torchvision.models.resnet152(pretrained=True) model = TransferLearningModel(model_conv)
from keras_model import KerasModel KerasModel.run_all_experiments(True) # KerasModel.run(25, 100, 'custom_model_4', -1, 0.00001)
examples = dataset['title'].map(str) + ' ' + dataset['description'] train_examples, test_examples, train_truths, test_truths = train_test_split( examples, truths, test_size=0.33) text_clf_extra_tree = Pipeline([('vect', CountVectorizer()), ('clf-extra-tree', ExtraTreesClassifier(n_estimators=100, n_jobs=12, bootstrap=False, min_samples_split=2, random_state=0))]) text_clf_extra_tree.fit(train_examples, train_truths) text_clf_prediction = text_clf_extra_tree.predict(test_examples) le = LabelEncoder() le.fit(train_truths) train_truths = le.transform(train_truths) clf = Pipeline([('vect', CountVectorizer(max_features=4000)), ('clf-keras', KerasModel())]) clf.fit(train_examples, train_truths) pred = clf.predict(test_examples).argmax(1) print("Extra tree with count vectorizer perecision: ", accuracy_score(test_truths, text_clf_prediction)) print("Keras with count vectorizer perecision: ", accuracy_score(le.transform(test_truths), pred)) print(le.inverse_transform(clf.predict(["rossetto rosso"]).argmax(1)[0]))
#df = util.load_data_to_dataframe('dataset/val_test_split.json') #unseen_test = create_features(df) #unseen_test.to_csv('cache/val_test_split.csv', index=False) unseen_test = filterout_mac_features(pd.read_csv('cache/val_test_split.csv')) train_feats, train_labels, _ = get_feats_labels_ids(unseen_test) return train_feats, train_labels X, Y, X_test, Y_test = get_data() # PART 2 FIT MODEL k = 2 models = [None]*k #models[0] = tree.DecisionTreeClassifier() models[1] = RandomForestClassifier(verbose=True, n_jobs=2, random_state=42, n_estimators=300) models[0] = KerasModel() kf = KFold(n_splits = k, shuffle = True, random_state = 2) i = 0 for train_index, valid_index in kf.split(X): X_train, X_val = X.iloc[train_index], X.iloc[valid_index] y_train, y_val = Y.iloc[train_index], Y.iloc[valid_index] models[i].fit(X_train, y_train) print("predicting on kfold validation") val_predict = models[i].predict(X_val) print(f"f1s: {f1_score(y_val, val_predict, average='micro')}") i += 1 # PART 3 SAVE MODEL # PART 4 EVALUATE
def keras_model(): return KerasModel()