def get_max_connections_hostname(): results = collection.aggregate([{ '$match': { 'timestamp': { '$gte': one_hour_before_ts, '$lt': current_timestamp } } }, { '$group': { '_id': '$src', 'total': { '$sum': 1 } } }, { '$sort': { 'total': -1 } }, { '$limit': 1 }]) for result in results: utils.logger( 'INFO', 'Host generating the most connections: {}'.format(result['_id']))
def get_hosts_receiving_from(src_host): results = collection.find({ 'timestamp': { '$gte': one_hour_before_ts, '$lt': current_timestamp }, 'src': src_host }) hostnames = [] for result in results: hostnames.append(result['dst']) utils.logger( 'INFO', 'List of hosts connected from {}: {}'.format(src_host, hostnames))
def get_hosts_connected_to(dst_host): results = collection.find({ 'timestamp': { '$gte': one_hour_before_ts, '$lt': current_timestamp }, 'dst': dst_host }) hostnames = [] for result in results: hostnames.append(result['src']) utils.logger( 'INFO', 'List of hosts connected to {}: {}'.format(destination_host, hostnames))
def performsFullAnalysis(url): result = {} try: result['classification'] = performClassificationAnalysis(url) result['text'] = result['classification']['text'] result['sentiment'] = performSentimentAnalysis(url) result['entity'] = performElsaAnalysis(url) result['concepts'] = performConceptAnayalisis(url) result['summary'] = performSummayAnalysis(url) for key in result.keys(): if key is not 'text': print(key) del result[key]['text'] except: utils.logger("error perfom the full anayalusis", "performsFullAnalysis") return result
def getUserByEmail(username, password): user = None try: client = GetDbClient() db = client.watchfulowl user = db.persons.find_one({ "$or": [{ "email": username }, { "user_info.username": username }], "user_info.password": password }) except: utils.logger("error getting the data", "getUserByEmail") return user
# config = read_config('config.yaml') # set variable from config file # SMTP_SERVER = config['SMTP_SERVER'] # PORT = config['PORT'] # FROM = config['FROM'] # TO = config['TO'] # PASSWORD = config['PASSWORD'] # HEALTHCHECK_URL = config['HEALTHCHECK_URL'] # LOG_FILENAME_SERVER = config['LOG_FILENAME_SERVER'] # LOG_FILENAME_EMAIL = config['LOG_FILENAME_EMAIL'] # # initialise logger # log = logger(LOG_FILENAME_EMAIL) log = logger(__name__) def send_email(error_msd, filename, config): """ send email with error message from log file Args: error_msg:``list`` Error message, each sentence as list filename:``str`` filename of log file config:``dict`` dict of config file .. code-block:: json
import matplotlib.pyplot as plt from sklearn.ensemble import BaggingRegressor from sklearn.ensemble import AdaBoostRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.neural_network import MLPRegressor from sklearn import tree from sklearn.neighbors import KNeighborsRegressor from sklearn.svm import SVR from sklearn import metrics from sklearn import model_selection if __name__ == '__main__': # Open data utils.logger("DATA LOADING...") train_data = utils.load_data("../files/train.csv") # Outlier Example plt.boxplot(train_data['full_sq']) plt.show() # Get current column ordering to order concat dataset right_order = [] for column in train_data.columns: right_order.append(column) test_data = utils.load_data("../files/test.csv") # Merge datasets frames = [train_data, test_data]
'$lt': current_timestamp } } }, { '$group': { '_id': '$src', 'total': { '$sum': 1 } } }, { '$sort': { 'total': -1 } }, { '$limit': 1 }]) for result in results: utils.logger( 'INFO', 'Host generating the most connections: {}'.format(result['_id'])) utils.logger('INFO', '[OPERATION] [CONNECTED TO] [{}]'.format(destination_host)) get_hosts_connected_to(destination_host) utils.logger('INFO', '[OPERATION] [CONNECTED FROM] [{}]'.format(source_host)) get_hosts_receiving_from(source_host) utils.logger('INFO', '[OPERATION] [GENERATOR] []') get_max_connections_hostname()
def apricot5(model, model_weights_dir, dataset, adjustment_strategy, activation='binary'): """ including Apricot and Apricot lite input: * dataset: [x_train_val, y_train_val, x_val, y_val, x_test, y_test] """ # package the dataset x_train, x_test, y_train, y_test = load_dataset(dataset) x_train_val, x_val, y_train_val, y_val = split_validation_dataset( x_train, y_train) # x_train_val = np.concatenate((x_train_val, x_val), axis=0) # y_train_val = np.concatenate((y_train_val, y_val), axis=0) # print(x_train_val.shape, type(x_train_val)) # print(y_train_val.shape, type(y_train_val)) # return fixed_model = model submodel_dir = os.path.join(model_weights_dir, 'submodels') trained_weights_path = os.path.join(model_weights_dir, 'trained.h5') fixed_weights_path = os.path.join( model_weights_dir, 'compare_fixed_{}_{}.h5'.format(adjustment_strategy, activation)) log_path = os.path.join(model_weights_dir, 'compare_log_{}.txt'.format(adjustment_strategy)) if not os.path.exists(fixed_weights_path): fixed_model.save_weights(fixed_weights_path) datagen = ImageDataGenerator(horizontal_flip=True, width_shift_range=0.125, height_shift_range=0.125, fill_mode='constant', cval=0.) datagen.fit(x_train) logger('----------original model----------', log_path) # submodels _, base_train_acc = fixed_model.evaluate(x_train_val, y_train_val) logger('The train accuracy: {:.4f}'.format(base_train_acc), log_path) _, base_val_acc = fixed_model.evaluate(x_val, y_val) # print('The validation accuracy: {:.4f}'.format(base_val_acc)) logger('The validation accuracy: {:.4f}'.format(base_val_acc), log_path) _, base_test_acc = fixed_model.evaluate(x_test, y_test) # print('The test accuracy: {:.4f}'.format(base_test_acc)) logger('The test accuracy: {:.4f}'.format(base_test_acc), log_path) best_weights = fixed_model.get_weights() best_acc = base_val_acc # find all indices of xs that original model fails on them. # fail_xs, fail_ys, fail_ys_label, fail_num = get_failing_cases(fixed_model, x_train_val, y_train_val) fail_xs, fail_ys, fail_ys_label, fail_num = get_failing_cases( fixed_model, x_train, y_train) # use the whole training dataset if settings.NUM_SUBMODELS == 20: sub_correct_matrix_path = os.path.join( model_weights_dir, 'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED, settings.NUM_SUBMODELS)) else: sub_correct_matrix_path = os.path.join( model_weights_dir, 'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED, settings.NUM_SUBMODELS)) sub_correct_matrix = None # 1: predicts correctly, -1: predicts incorrectly. print('obtaining sub correct matrix...') if not os.path.exists(sub_correct_matrix_path): # obtain submodel correctness matrix sub_correct_matrix = cal_sub_corr_matrix(fixed_model, sub_correct_matrix_path, submodel_dir, fail_xs, fail_ys, fail_ys_label, fail_num, num_submodels=20) else: sub_correct_matrix = np.load(sub_correct_matrix_path) # generate random matrix for comparison. # sub_correct_matrix = np.random.randint(0,2, sub_correct_matrix.shape) # sub_correct_matrix[sub_correct_matrix == 0] = -1 sub_correct_matrix = np.ones(sub_correct_matrix.shape) sub_correct_matrix = sub_correct_matrix * -1 sub_weights_list = get_submodels_weights(fixed_model, submodel_dir) print('collected.') fixed_model.load_weights(trained_weights_path) # print(sub_correct_matrix.shape) # print(sub_correct_matrix[0:20, :]) # print('start fixing process...') logger('----------start fixing process----------', log_path) logger( 'number of cases to be adjusted: {}'.format( sub_correct_matrix.shape[0]), log_path) for _ in range(settings.LOOP_COUNT): np.random.shuffle(sub_correct_matrix) # load batches rather than single input. iter_num, rest = divmod(sub_correct_matrix.shape[0], settings.FIX_BATCH_SIZE) if rest != 0: iter_num += 1 print('iter num: {}'.format(iter_num)) # batch version for i in range(iter_num): curr_weights = fixed_model.get_weights() batch_corr_matrix = sub_correct_matrix[settings.FIX_BATCH_SIZE * i:settings.FIX_BATCH_SIZE * (i + 1), :] # print('---------------------------------') # print(batch_corr_matrix) # print('---------------------------------') corr_w, incorr_w = batch_get_adjustment_weights( batch_corr_matrix, sub_weights_list, adjustment_strategy, curr_weights) # print(len(corr_w),len(incorr_w)) print('calculating batch adjust weights...') # adjust_w = None # print(adjust_w) adjust_w = batch_adjust_weights_func(curr_weights, corr_w, incorr_w, adjustment_strategy, activation=activation) # print(curr_weights[0][0]) # print('----------') # print(adjust_w[0][0]) fixed_model.set_weights(adjust_w) _, curr_acc = fixed_model.evaluate(x_val, y_val) print('After adjustment, the validation accuracy: {:.4f}'.format( curr_acc)) if curr_acc > best_acc: best_acc = curr_acc fixed_model.save_weights(fixed_weights_path) if adjustment_strategy <= 3: # further training epochs. checkpoint = ModelCheckpoint(fixed_weights_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') checkpoint.best = best_acc hist = fixed_model.fit_generator( datagen.flow(x_train_val, y_train_val, batch_size=settings.BATCH_SIZE), steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1, validation_data=(x_val, y_val), epochs=settings.FURTHER_ADJUSTMENT_EPOCHS, callbacks=[checkpoint]) # for key in hist.history: # print(key) fixed_model.load_weights(fixed_weights_path) # eval the model _, val_acc = fixed_model.evaluate(x_val, y_val, verbose=0) # _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0) best_acc = val_acc # print('validation accuracy after further training: {:.4f}'.format(test_acc)) logger( 'validation accuracy improved, after further training: {:.4f}' .format(val_acc), log_path) else: logger( 'validation accuracy improved: {:.4f}'.format( best_acc), log_path) else: fixed_model.load_weights(fixed_weights_path) # pass fixed_model.load_weights(fixed_weights_path) if adjustment_strategy > 3: # final training process. _, val_acc = fixed_model.evaluate(x_val, y_val) checkpoint = ModelCheckpoint(fixed_weights_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') checkpoint.best = val_acc fixed_model.fit_generator( datagen.flow(x_train_val, y_train_val, batch_size=settings.BATCH_SIZE), steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1, validation_data=(x_val, y_val), epochs=20, callbacks=[checkpoint]) fixed_model.load_weights(fixed_weights_path) # final evaluation. _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0) logger('----------final evaluation----------', log_path) logger('test accuracy: {:.4f}'.format(test_acc), log_path)
def apricot2(model, model_weights_dir, dataset, adjustment_strategy, activation='binary'): """ including Apricot and Apricot lite input: * dataset: [x_train_val, y_train_val, x_val, y_val, x_test, y_test] """ # package the dataset x_train, x_test, y_train, y_test = load_dataset(dataset) x_train_val, x_val, y_train_val, y_val = split_validation_dataset( x_train, y_train) fixed_model = model submodel_dir = os.path.join(model_weights_dir, 'submodels') trained_weights_path = os.path.join(model_weights_dir, 'trained.h5') fixed_weights_path = os.path.join( model_weights_dir, 'fixed_{}_{}.h5'.format(adjustment_strategy, activation)) log_path = os.path.join(model_weights_dir, 'log_{}.txt'.format(adjustment_strategy)) if not os.path.exists(fixed_weights_path): fixed_model.save_weights(fixed_weights_path) datagen = ImageDataGenerator(horizontal_flip=True, width_shift_range=0.125, height_shift_range=0.125, fill_mode='constant', cval=0.) datagen.fit(x_train_val) logger('----------original model----------', log_path) # submodels _, base_val_acc = fixed_model.evaluate(x_val, y_val) # print('The validation accuracy: {:.4f}'.format(base_val_acc)) logger('The validation accuracy: {:.4f}'.format(base_val_acc), log_path) _, base_test_acc = fixed_model.evaluate(x_test, y_test) # print('The test accuracy: {:.4f}'.format(base_test_acc)) logger('The test accuracy: {:.4f}'.format(base_test_acc), log_path) best_weights = fixed_model.get_weights() best_acc = base_val_acc # find all indices of xs that original model fails on them. fail_xs, fail_ys, fail_ys_label, fail_num = get_failing_cases( fixed_model, x_train_val, y_train_val) if settings.NUM_SUBMODELS == 20: sub_correct_matrix_path = os.path.join( model_weights_dir, 'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED, settings.NUM_SUBMODELS)) else: sub_correct_matrix_path = os.path.join( model_weights_dir, 'corr_matrix_{}_{}.npy'.format(settings.RANDOM_SEED, settings.NUM_SUBMODELS)) sub_correct_matrix = None # 1: predicts correctly, 0: predicts incorrectly. print('obtaining sub correct matrix...') if not os.path.exists(sub_correct_matrix_path): # obtain submodel correctness matrix sub_correct_matrix = cal_sub_corr_matrix(fixed_model, sub_correct_matrix_path, submodel_dir, fail_xs, fail_ys, fail_ys_label, fail_num) else: sub_correct_matrix = np.load(sub_correct_matrix_path) sub_weights_list = get_submodels_weights(fixed_model, submodel_dir) print('collected.') fixed_model.load_weights(trained_weights_path) # print('start fixing process...') logger('----------start fixing process----------', log_path) for _ in range(settings.LOOP_COUNT): np.random.shuffle(sub_correct_matrix) for index in range(sub_correct_matrix.shape[0]): curr_weights = fixed_model.get_weights() corr_mat = sub_correct_matrix[index, :] print('obtaining correct and incorrect weights...') if adjustment_strategy <= 3: corr_w, incorr_w = get_adjustment_weights( corr_mat, sub_weights_list, adjustment_strategy) print('calculating adjust weights...') adjust_w = adjust_weights_func(curr_weights, corr_w, incorr_w, adjustment_strategy, activation=activation) else: # lite version print('calculating adjust weights...') adjust_w = adjust_weights_func_lite(corr_mat, sub_weights_list, curr_weights) if adjust_w == -1: continue fixed_model.set_weights(adjust_w) _, curr_acc = fixed_model.evaluate(x_val, y_val) print('After adjustment, the validation accuracy: {:.4f}'.format( curr_acc)) if curr_acc > best_acc: best_acc = curr_acc fixed_model.save_weights(fixed_weights_path) if adjustment_strategy <= 3: # further training epochs. checkpoint = ModelCheckpoint(fixed_weights_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') checkpoint.best = best_acc fixed_model.fit_generator( datagen.flow(x_train_val, y_train_val, batch_size=settings.BATCH_SIZE), steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1, validation_data=(x_val, y_val), epochs=settings.FURTHER_ADJUSTMENT_EPOCHS, callbacks=[checkpoint]) fixed_model.load_weights(fixed_weights_path) # eval the model _, val_acc = fixed_model.evaluate(x_val, y_val, verbose=0) # _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0) best_acc = val_acc # print('validation accuracy after further training: {:.4f}'.format(test_acc)) logger( 'validation accuracy improved, after further training: {:.4f}' .format(val_acc), log_path) else: logger( 'validation accuracy improved: {:.4f}'.format( best_acc), log_path) else: fixed_model.load_weights(fixed_weights_path) fixed_model.load_weights(fixed_weights_path) if adjustment_strategy > 3: # final training process. _, val_acc = fixed_model.evaluate(x_val, y_val) checkpoint = ModelCheckpoint(fixed_weights_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') checkpoint.best = val_acc fixed_model.fit_generator( datagen.flow(x_train_val, y_train_val, batch_size=settings.BATCH_SIZE), steps_per_epoch=len(x_train_val) // BATCH_SIZE + 1, validation_data=(x_val, y_val), epochs=settings.FURTHER_ADJUSTMENT_EPOCHS, callbacks=[checkpoint]) fixed_model.load_weights(fixed_weights_path) # final evaluation. _, test_acc = fixed_model.evaluate(x_test, y_test, verbose=0) logger('----------final evaluation----------', log_path) logger('test accuracy: {:.4f}'.format(test_acc), log_path)
outdir = os.path.join(basedir, "Physio CSV data_preprocessed") outfile = os.path.join(basedir, "Summary.csv") infosheetfile = os.path.join(basedir, "Physio AMP_Subject_Info_Sheet (1).xlsx") if not os.path.exists(outdir): os.mkdir(outdir) # get a list of all subjects in physio folder folders = [x for x in os.listdir(datadir) if not x.startswith(".DS")] subjects = dict(Counter([x[:10] for x in folders])) ############## # PREPROCESS # ############## utils.logger("\nAligning timestamps for all subjects...\n", level=0) # loop over all physio folders and preprocess timestamps to datetimes for all measurements for folder in folders: # reconstruct subject folder and make preprocessing folder (if not existing) subdir = os.path.join(datadir, folder) suboutdir = os.path.join(outdir, folder) if not os.path.exists(suboutdir): os.mkdir(suboutdir) # loop over ACC, EDA, BVP, TEMP, HR that have the same processing stream for metric in ['ACC', 'EDA', 'BVP', 'TEMP', 'HR', 'IBI']: measurements = utils.extract_measurements(metric, subdir) if isinstance(measurements, int): continue
documents = [] start = time.time() for line in sys.stdin: parsed_line = line.strip().split(" ") try: documents.append({ 'timestamp': datetime.datetime.fromtimestamp(int(parsed_line[0]) / 1e3), 'src': parsed_line[1], 'dst': parsed_line[2] }) except IndexError: utils.logger('ERROR', 'Line index out of range {}'.format(parsed_line)) if len(documents) >= 3000 or (time.time() - start) > 3.0: try: collection.insert_many(documents) utils.logger('INFO', '[DB] [INSERT] [{}]'.format(len(documents))) documents = [] start = time.time() except Exception as e: utils.logger('ERROR', '[DB] [INSERTION] [{}]'.format(e)) try: collection.insert_many(documents) utils.logger('INFO', '[DB] [INSERT] [{}]'.format(len(documents))) except Exception as e: utils.logger('ERROR', '[DB] [INSERTION] [{}]'.format(e))