Beispiel #1
0
class EmailFilter:
    def __init__(self):
        self.load_ner()
        self.load_image_model()
         
    def load_ner(self):
        self.ner = NER()
        self.ner.load(os.path.join(os.path.expanduser("~"),"ShellPrivacyFilterDemo","backend", "models", "ner_model"))
        
    def load_image_model(self):
        self.keras_model = KerasModel()
        self.keras_model.load_model()
        
    def email_ner(self, text):
        return self.ner.test(text)
    
    def attachment_ner(self, attachment_docx):
        self.de.extract_text(attachment_docx, "text")
        name = os.path.basename(os.path.normpath(attachment_docx))
        with open(os.path.join(os.path.expanduser("~"), "ShellPrivacyFilterDemo", "data", name.split(".")[0] + "_text.txt"), "r") as file:
            text = file.read()
            ner_result = self.ner.test(text)
            
            if len(ner_result) > 0:
                return True
        
        return False
    
    def attachment_image(self, attachment_docx):
        self.de.extract_images(attachment_docx, "images")
        predictions = []
        for image in self.de.images:
            predictions.append(self.keras_model.test(os.path.join(os.path.expanduser("~"), "ShellPrivacyFilterDemo", "data", "images", image)))
        
        if "Confidential" in predictions:
            return True
        
        return False
    
    def attachment_scan(self, attachment_docx):
        self.de = DocumentExtract()
        unsafe_text = self.attachment_ner(attachment_docx)
        unsafe_image = self.attachment_image(attachment_docx)
        
        if unsafe_text == True or unsafe_image == True:
            return "issue"
        
        return "no issue"
Beispiel #2
0
def run(model_id):
    """Run experiment."""

    config = configs[model_id]
    logger.info('\n\n\ntrain model {}'.format(model_id))

    # prepare data
    if config['preprocess_fn'] is not None:
        function = getattr(data_generator, config['preprocess_fn'])
        preprocess_fn = partial(function, **config['preprocess'])
    else:
        preprocess_fn = None
    generator = Generator(path=PATH_TRAIN,
                          IDs=meta_train.index.tolist(),
                          labels=meta_train[['target']],
                          preprocessing_fn=preprocess_fn,
                          shuffle=False, batch_size=64,
                          **config['generator'])
    X, y = generate_train_data(generator, meta_train)
    logger.info('X shape: {}, y shape: {}'.format(X.shape, y.shape))

    # define model
    model_function = getattr(models, config['model_name'])
    nn_model = partial(model_function,
                       input_shape=(X.shape[1:]),
                       **config['model_params'])
    nn_model().summary(print_fn=logger.info)
    model = KerasModel(nn_model, logger=logger, **config['train'])

    # train and save model
    cross_val = CrossValidation(X=X, y=y, Xtest=X[:100],
                                logger=logger, **config['cv'])
    pred, pred_test, metrics, trained_models = cross_val.run_cv(model)

    for i, model in enumerate(trained_models):
        path = os.path.join(MODELS_PATH, 'model_{}_{}.h5'.format(model_id, i))
        model.save(path)
Beispiel #3
0
    def analyze(json_path, weight_path):
        dev_images, dev_labels = KerasModel.load_images_and_labels(
            constants.FULL_SQUAT_DEV_FOLDER)
        image_names = Utils.get_image_names(constants.FULL_SQUAT_DEV_FOLDER)

        model = ModelAnalysis.load_model(json_path, weight_path)
        predictions = model.predict_on_batch(dev_images)
        prediction_labels = []
        for prediction in predictions:
            prediction_labels.append(np.argmax(prediction))

        for i in range(len(dev_labels)):
            if dev_labels[i] != prediction_labels[i]:
                print("{} label: {} predict: {}".format(
                    image_names[i], dev_labels[i], prediction_labels[i]))
def main(args):
    if len(args) < 2:
        sys.stderr.write('Two required arguments: <train|classify|optimize> <data directory>\n')
        sys.exit(-1)

    if args[0] == 'train':
        working_dir = args[1]
        model = CnnEntityModel()
        train_x, train_y = model.read_training_instances(working_dir)
        trained_model, history = model.train_model_for_data(train_x, train_y, 200, model.get_default_config(), checkpoint_prefix='cnn_best_model', early_stopping=True)
        model.write_model(working_dir, trained_model)
        
    elif args[0] == 'classify':
        working_dir = args[1]
        model = KerasModel.read_model(working_dir)
     
        while True:
            try:
                line = sys.stdin.readline().rstrip()
                if not line:
                    break
                
                label = model.classify_line(line)
                print(label)
                sys.stdout.flush()
            except Exception as e:
                print("Exception %s" % (e) )
    elif args[0] == 'optimize':
        working_dir = args[1]
        model = CnnEntityModel()
        train_x, train_y = model.read_training_instances(working_dir)
        optim = RandomSearch(model, train_x, train_y)
        best_config = optim.optimize()
        print("Best config: %s" % best_config)
    else:
        sys.stderr.write("Do not recognize args[0] command argument: %s\n" % (args[0]))
        sys.exit(-1)
Beispiel #5
0
#tfds.as_dataframe(ds_train.take(4), ds_info)


def scale(image, label):
    # TFDS provide the images as tf.uint8, while the model expect tf.float32, so normalize images
    return tf.cast(image, tf.float32) / 255., label


ds_train = ds_train.map(scale,
                        num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

model = KerasModel(input_shape=(28, 28, 1))

ds_test = ds_test.map(scale, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)

logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

model.compile(
    tf.keras.optimizers.Adam(0.001),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

model.fit(ds_train,
Beispiel #6
0
 def load_image_model(self):
     self.keras_model = KerasModel()
     self.keras_model.load_model()
Beispiel #7
0

def get_script_path():
    if hasattr(sys, 'ps1') or sys.flags.interactive:  #python -i
        return os.getcwd()
    else:
        return os.path.dirname(os.path.realpath(__file__))


def create_log_dir():
    path = get_script_path()
    logdir = os.path.join(path, 'logs',
                          str(datetime.now().strftime("%Y%m%d_%H%M%S")))
    createdir_safe(logdir)
    return logdir


def remove_previous_tests():
    path = get_script_path()
    dirpath = os.path.join(path, 'logs')
    if os.path.exists(dirpath) and os.path.isdir(dirpath):
        shutil.rmtree(dirpath)


if __name__ == '__main__':
    dataset = Dataset()
    dataset.prepare_dataset()
    keras_model = KerasModel(dataset)
    remove_previous_tests()
    keras_model.train(create_log_dir())
    keras_model.predict_dataset()
print("train_x", train_x.shape, train_x.dtype)

# Reshape data
train_x = train_x.reshape(
    (len(train_x), img_width, img_height, img_num_channels))
test_x = test_x.reshape((len(test_x), img_width, img_height, img_num_channels))

print("train_x", train_x.shape, train_x.dtype)

input_shape = (img_width, img_height, img_num_channels)

logdir = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

model = KerasModel(input_shape)
# tf.keras.utils.plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(0.001),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

# Fit data to model
history = model.fit(train_x,
                    train_y,
                    callbacks=[tensorboard_callback],
                    batch_size=50,
                    epochs=6,
                    verbose=True,
                    validation_split=0.2)
model.summary()
Beispiel #9
0
# Training and validation generators
print('Training generator')
train_generator, train_steps_per_epoch = get_data_generator_and_steps_per_epoch(
    train_samples, BATCHSIZE, multivariant=MULTIVARIANT)
print('Validation generator')
validation_generator, validation_steps_per_epoch = get_data_generator_and_steps_per_epoch(
    validation_samples, BATCHSIZE, validation=True, multivariant=MULTIVARIANT)
print('Training steps per epoch {}'.format(train_steps_per_epoch))
print('Validation steps per epoch {}'.format(validation_steps_per_epoch))

model_file = 'model_combined_last_0.2_drop_batch_new_augmentation.h5'
# Initializing a KerasMoel instance
k_model = KerasModel(1,
                     keras_model.NVIDIA_ARCHITECTURE,
                     dropout=0.2,
                     batch_norm=BATCH_NORM,
                     model_file=model_file,
                     multivariant=MULTIVARIANT,
                     gray=GRAY,
                     load=False)
# Training the KerasModel model and getting the metrics
model_history = k_model.train_model_with_generator(train_generator,
                                                   train_steps_per_epoch,
                                                   EPOCHS,
                                                   validation_generator,
                                                   validation_steps_per_epoch,
                                                   save_model_filepath=model_file)
# model_history = k_model.train_learned_model_with_generator(train_generator,
#                                                            train_steps_per_epoch,
#                                                            EPOCHS,
#                                                            validation_generator,
#                                                            validation_steps_per_epoch,
Beispiel #10
0
    val_feats, val_labels, val_ids = get_feats_labels_ids(val)
    return train_feats, train_labels, val_feats, val_labels

def get_real_data():
    #df = util.load_data_to_dataframe('dataset/val_test_split.json')
    #unseen_test = create_features(df)
    #unseen_test.to_csv('cache/val_test_split.csv', index=False)
    unseen_test = filterout_mac_features(pd.read_csv('cache/val_test_split.csv'))
    train_feats, train_labels, _ = get_feats_labels_ids(unseen_test)
    return train_feats, train_labels

X, Y, X_test, Y_test = get_data()

# PART 2 FIT MODEL

model = KerasModel()

model.fit(X, Y)
    
print("predicting on kfold validation")

# PART 5 EVALUATE ON UNSEEN
X_real, Y_real = get_real_data()

real_predict = model.predict(X_real)
print(f"Average f1s on unseen: {f1_score(Y_real, real_predict, average='micro')}")

# PART 6 PREPARE SUBMISSION
def get_data_for_submitting():
    #df_test = util.load_data_to_dataframe('dataset/test.json')
    #prepared_df = create_features(df_test)
Beispiel #11
0
}
bst3 = GradientBoostingClassifier(**params_est)
bst3.fit(X_train, y_train)
# ------------------------------------------------------------------
from keras.callbacks import Callback as keras_clb
random.seed(666)
np.random.seed(666)


class LearningRateClb(keras_clb):
    def on_epoch_end(self, epoch, logs={}):
        if epoch == 300:
            self.model.optimizer.lr.set_value(0.01)


bst4 = KerasModel(cols_k2, 600)
bst4.fit_process(X_train_nn, y_train)
bst4.fit(X_train_nn, y_train, callbacks=[LearningRateClb()])
# ------------------------------------------------------------------
bst5 = LogisticRegression()
bst5.fit(X_train_reg, y_train)
# ------------------------------------------------------------------
params = {
    'silent': 1,
    'objective': 'binary:logistic',
    'max_depth': 3,
    'eta': 0.01,
    'subsample': 0.65,
    'colsample_bytree': 0.3,
    'min_child_weight': 5,
    'n': 1140,
Beispiel #12
0
# model.add(MaxPooling2D())
# # Flattening the Images after the convolutional steps
# model.add(Flatten())
# # Fist dense layer
# model.add(Dense(120))
# # Second dense layer
# model.add(Dense(84))
# # Logits layer
# model.add(Dense(1))
# # Defining the loss function and optimizer
# model.compile(loss='mse', optimizer='adam')

training_lenght = math.ceil((len(train_samples)*3*2) / BATCHSIZE)
validation_length = math.ceil((len(validation_samples)*3*2) / BATCHSIZE)
# print(len(list(train_generator)))
k_model = KerasModel(1, keras_model.LENET_ARCHITECTURE)
model_history = k_model.train_model_with_generator(train_generator,
                                                   training_lenght,
                                                   EPOCHS,
                                                   validation_generator,
                                                   validation_length,
                                                   save_model_filepath='model_modular.h5')
# model_history = model.fit_generator(train_generator,
#                                     steps_per_epoch=training_lenght,
#                                     validation_data=validation_generator,
#                                     validation_steps=validation_length,
#                                     epochs=EPOCHS, verbose=1)
#
# model.save('model.h5')
# model.save('model_track2.h5')
plot_loss(model_history=model_history)
Beispiel #13
0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if model_to_run == "KERAS_CNN":
        # ################################################ KERAS MODEL ################################################### #
        # These file paths are specified so that model parameters can be saved after training
        model_name_json_path = os.path.join(os.path.abspath(__file__), '..',
                                            'data', 'Keras_best_model.json')
        model_name_h5_path = os.path.join(os.path.abspath(__file__), '..',
                                          'data', 'Keras_best_model.h5')

        y_train = train_labels.numpy()
        X_train = data_loaders_original["train"].dataset.imgs
        X_test = data_loaders_original["test"].dataset.imgs

        keras_model = KerasModel(model_name_json_path=model_name_json_path,
                                 model_name_h5_path=model_name_h5_path,
                                 X=X_train)
        keras_model.fit(X=X_train, y=y_train)

        y_pred = keras_model.predict(X_train)
        tr_error = np.mean(y_pred != y_train[:, None])
        print(f"Keras Model Training Error is: {tr_error}")
        test_labels = keras_model.predict(X_test)
        save_results_in_csv(test_labels)

    elif model_to_run == "TRANSFER_LEARNING":
        ### load Resnet152 pre-trained model
        model_conv = torchvision.models.resnet152(pretrained=True)

        model = TransferLearningModel(model_conv)
Beispiel #14
0
from keras_model import KerasModel
KerasModel.run_all_experiments(True)

# KerasModel.run(25, 100, 'custom_model_4', -1, 0.00001)
Beispiel #15
0
examples = dataset['title'].map(str) + ' ' + dataset['description']

train_examples, test_examples, train_truths, test_truths = train_test_split(
    examples, truths, test_size=0.33)

text_clf_extra_tree = Pipeline([('vect', CountVectorizer()),
                                ('clf-extra-tree',
                                 ExtraTreesClassifier(n_estimators=100,
                                                      n_jobs=12,
                                                      bootstrap=False,
                                                      min_samples_split=2,
                                                      random_state=0))])
text_clf_extra_tree.fit(train_examples, train_truths)
text_clf_prediction = text_clf_extra_tree.predict(test_examples)

le = LabelEncoder()
le.fit(train_truths)
train_truths = le.transform(train_truths)

clf = Pipeline([('vect', CountVectorizer(max_features=4000)),
                ('clf-keras', KerasModel())])
clf.fit(train_examples, train_truths)
pred = clf.predict(test_examples).argmax(1)

print("Extra tree with count vectorizer perecision: ",
      accuracy_score(test_truths, text_clf_prediction))
print("Keras with count vectorizer perecision: ",
      accuracy_score(le.transform(test_truths), pred))

print(le.inverse_transform(clf.predict(["rossetto rosso"]).argmax(1)[0]))
Beispiel #16
0
    #df = util.load_data_to_dataframe('dataset/val_test_split.json')
    #unseen_test = create_features(df)
    #unseen_test.to_csv('cache/val_test_split.csv', index=False)
    unseen_test = filterout_mac_features(pd.read_csv('cache/val_test_split.csv'))
    train_feats, train_labels, _ = get_feats_labels_ids(unseen_test)
    return train_feats, train_labels

X, Y, X_test, Y_test = get_data()

# PART 2 FIT MODEL
k = 2

models = [None]*k
#models[0] = tree.DecisionTreeClassifier() 
models[1] = RandomForestClassifier(verbose=True, n_jobs=2, random_state=42, n_estimators=300)
models[0] = KerasModel()

kf = KFold(n_splits = k, shuffle = True, random_state = 2)
i = 0
for train_index, valid_index in kf.split(X):
    X_train, X_val = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_val = Y.iloc[train_index], Y.iloc[valid_index]
    models[i].fit(X_train, y_train)
    
    print("predicting on kfold validation")
    val_predict = models[i].predict(X_val)
    print(f"f1s: {f1_score(y_val, val_predict, average='micro')}")
    i += 1
# PART 3 SAVE MODEL

# PART 4 EVALUATE
Beispiel #17
0
def keras_model():
    return KerasModel()