def vary_batch_size(): global embedding_dims global SAVE_DIR global _DIR global DATA_DIR global CONFIG global CONFIG_FILE global MODEL_NAME global DOMAIN_DIMS global logger with open(CONFIG_FILE) as f: CONFIG = yaml.safe_load(f) DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR) setup_general_config() if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # logger.info('-------------------') logger.info('DIR ' + _DIR) train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=1) testing_dict = {} for _c in range(1, 3 + 1): _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=_c) testing_dict[_c] = [test_pos, test_anomaly] DOMAIN_DIMS = domain_dims print('Data shape', train_x_pos.shape) for bs in [64, 128, 256, 512, 1024]: process_all(CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict, batch_size=bs) logger.info('-------------------')
def main(): global embedding_dims global SAVE_DIR global _DIR global DATA_DIR global CONFIG global CONFIG_FILE global MODEL_NAME global DOMAIN_DIMS time_1 = time.time() with open(CONFIG_FILE) as f: CONFIG = yaml.safe_load(f) _DIR = CONFIG['_DIR'] DATA_DIR = CONFIG['DATA_DIR'] + '/' + _DIR setup_general_config() if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # train_x_pos, train_x_neg, test_pos, test_anomaly, domain_dims = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=2) DOMAIN_DIMS = domain_dims print('Data shape', train_x_pos.shape) time_1 = time.time() process(CONFIG, _DIR, train_x_pos, train_x_neg, test_pos, test_anomaly) time_2 = time.time() print('time taken ', time_2 - time_1)
def main(exec_dir=None, ablation_flag=False): global embedding_dims global SAVE_DIR global _DIR global DATA_DIR global CONFIG global CONFIG_FILE global MODEL_NAME global DOMAIN_DIMS global logger _DIR = exec_dir DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR) setup_general_config() if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # logger.info('-------------------') logger.info('DIR ' + exec_dir) logger.info(' Ablation ') logger.info(ablation_flag) train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=1) testing_dict = {} for _c in range(1, 3 + 1): _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=_c) testing_dict[_c] = [test_pos, test_anomaly] print('Data pos shape', train_x_pos.shape) print('Data neg shape', train_x_neg.shape) time_1 = time.time() process(CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict, ablation_flag) time_2 = time.time() logger.info('-------------------') print('time taken ', time_2 - time_1)
def main(): global embedding_dims global SAVE_DIR global _DIR global DATA_DIR global CONFIG global CONFIG_FILE global MODEL_NAME global DOMAIN_DIMS global logger DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR) setup_general_config() if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # logger.info('-------------------') logger.info('DIR ' + _DIR) train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=1) testing_dict = {} for _c in range(1, 3 + 1): _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=_c) testing_dict[_c] = [test_pos, test_anomaly] DOMAIN_DIMS = domain_dims print('Data shape', train_x_pos.shape) process_all(CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict) logger.info('-------------------')
def get_data(data_dir, dir): train_x_pos, _, _, _, domain_dims = data_fetcher.get_data_v3(data_dir, dir, c=1) test_dict_cIdx_data = {} for c in range(1, 3 + 1): _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(data_dir, dir, c=c) test_pos_idList = test_pos[0] test_pos_x = test_pos[1] test_anomaly_idList = test_anomaly[0] test_anomaly_x = test_anomaly[1] test_ids = list(np.hstack([test_pos_idList, test_anomaly_idList])) test_data_x = np.vstack([test_pos_x, test_anomaly_x]) test_dict_cIdx_data[c] = [test_ids, test_data_x, test_anomaly_idList] return train_x_pos, test_dict_cIdx_data
def get_data(data_dir, dir): def stringify_data(arr) -> np.array: tmp1 = [] for i in range(arr.shape[0]): tmp2 = [] for j in range(arr.shape[1]): tmp2.append(str(arr[i][j]) + '_' + str(j)) tmp1.append(tmp2) tmp1 = np.array(tmp1) return tmp1 train_x_pos, _, _, _, domain_dims = data_fetcher.get_data_v3(data_dir, dir, c=1) train_x_pos = stringify_data(train_x_pos) test_dict_cIdx_data = {} for c in range(1, 3 + 1): _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(data_dir, dir, c=c) test_pos_idList = test_pos[0] test_pos_x = test_pos[1] test_anomaly_idList = test_anomaly[0] test_anomaly_x = test_anomaly[1] test_ids = list(np.hstack([test_pos_idList, test_anomaly_idList])) test_data_x = np.vstack([test_pos_x, test_anomaly_x]) test_data_x = stringify_data(test_data_x) test_dict_cIdx_data[c] = [test_ids, test_data_x, test_anomaly_idList] return train_x_pos, test_dict_cIdx_data
def main(): global embedding_dims global SAVE_DIR global _DIR global DATA_DIR global CONFIG global CONFIG_FILE global MODEL_NAME global DOMAIN_DIMS global logger global OP_DIR setup_general_config() if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # logger.info('-------------------') logger.info('DIR ' + _DIR) train_x_pos, train_x_neg, test_pos, test_anomaly, domain_dims = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=2) DOMAIN_DIMS = domain_dims print('Data shape', train_x_pos.shape) train_x_embeddings = embedding_analysis_v1(CONFIG, _DIR, train_x_pos, train_x_neg, test_pos, test_anomaly) print(' >>>> ', train_x_embeddings.shape) # Write out the train_x & its embedding to a file op_data = [train_x_pos, train_x_embeddings] with open(os.path.join(OP_DIR, 'train_embedding_values.pkl'), 'wb') as fh: pickle.dump(op_data, fh, pickle.HIGHEST_PROTOCOL) logger.info('-------------------')
def vary_num_neg_type(_type=None): global embedding_dims global SAVE_DIR global _DIR global DATA_DIR global CONFIG global CONFIG_FILE global MODEL_NAME global DOMAIN_DIMS global logger with open(CONFIG_FILE) as f: CONFIG = yaml.safe_load(f) DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR) setup_general_config() if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # logger.info('-------------------') logger.info('DIR ' + _DIR) if _type is None: _type = 'normal' logger.info(' Negative samplying type ' + _type) if _type == 'ape': train_x_pos, train_x_neg, _, _, _, _, domain_dims = data_fetcher.get_data_v1( CONFIG['DATA_DIR'], _DIR, c=1) k = 3 _indices = np.arange(0, train_x_neg.shape[1], k) train_x_neg = np.take(train_x_neg, _indices, axis=1) else: # ensure same number of samples as APE _, tmp, _, _, _, _, _ = data_fetcher.get_data_v1(CONFIG['DATA_DIR'], _DIR, c=1) k = 3 _count = int(tmp.shape[1] / k) print(_count) train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=1) train_x_neg = train_x_neg[:, :_count, :] testing_dict = {} for _c in range(1, 3 + 1): _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=_c) testing_dict[_c] = [test_pos, test_anomaly] DOMAIN_DIMS = domain_dims print('Data shape', train_x_pos.shape) process_all(CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict) logger.info('-------------------')
def main(): global embedding_dims global SAVE_DIR global _DIR global DATA_DIR global CONFIG global CONFIG_FILE global MODEL_NAME global DOMAIN_DIMS global logger with open(CONFIG_FILE) as f: CONFIG = yaml.safe_load(f) DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR) setup_general_config() if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir( os.path.join(SAVE_DIR, 'checkpoints') ) # ------------ # if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')): os.mkdir(os.path.join(SAVE_DIR, 'checkpoints')) # ------------ # logger.info('-------------------') logger.info('DIR ' + _DIR) train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=1 ) train_x_neg = train_x_neg[:,:12,:] testing_dict = {} for _c in range(2, 3 + 1): testing_dict[_c] = {} for s in [1] : _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3( CONFIG['DATA_DIR'], _DIR, c=_c ) compreXdata_loc = './../../comprex/comprexData/' + _DIR test_anomalies_ids_file = os.path.join(compreXdata_loc, "id_test_anomalies_c{}_sample{}.txt".format(_c,s)) df_0 = pd.read_csv(test_anomalies_ids_file,header=None) test_anomalies_ids = list(df_0[0]) test_anomaly_idList = test_anomaly[0] anomaly_data = test_anomaly[1] tmp_df = pd.DataFrame( np.hstack([np.reshape(test_anomaly_idList,[-1,1]),anomaly_data]) ) print(len(tmp_df)) tmp_df = tmp_df.loc[tmp_df[0].isin(test_anomalies_ids)] print(len(tmp_df)) test_anomaly_idList = (tmp_df[0]).values del tmp_df[0] anomaly_data = tmp_df.values test_anomaly = [test_anomaly_idList, anomaly_data] # ---- test_set_ids_file = os.path.join(compreXdata_loc, "id_test_set_c{}_sample{}.txt".format(_c, s)) df_0 = pd.read_csv(test_set_ids_file, header=None) test_set_ids = list(df_0[0]) test_normal_idList = test_pos[0] test_x = test_pos[1] tmp_df = pd.DataFrame( np.hstack([np.reshape(test_normal_idList, [-1, 1]), test_x]) ) tmp_df = tmp_df.loc[tmp_df[0].isin(test_set_ids)] test_normal_idList = (tmp_df[0]).values del tmp_df[0] test_x = tmp_df.values test_pos = [test_normal_idList, test_x] testing_dict[_c][s] = [test_pos, test_anomaly] DOMAIN_DIMS = domain_dims print('Data shape', train_x_pos.shape) process_all( CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict ) logger.info('-------------------')