def vary_batch_size():
    global embedding_dims
    global SAVE_DIR
    global _DIR
    global DATA_DIR
    global CONFIG
    global CONFIG_FILE
    global MODEL_NAME
    global DOMAIN_DIMS
    global logger

    with open(CONFIG_FILE) as f:
        CONFIG = yaml.safe_load(f)

    DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR)

    setup_general_config()

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #
    logger.info('-------------------')
    logger.info('DIR ' + _DIR)

    train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3(
        CONFIG['DATA_DIR'], _DIR, c=1)

    testing_dict = {}

    for _c in range(1, 3 + 1):
        _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(
            CONFIG['DATA_DIR'], _DIR, c=_c)
        testing_dict[_c] = [test_pos, test_anomaly]

    DOMAIN_DIMS = domain_dims
    print('Data shape', train_x_pos.shape)

    for bs in [64, 128, 256, 512, 1024]:

        process_all(CONFIG,
                    _DIR,
                    train_x_pos,
                    train_x_neg,
                    testing_dict,
                    batch_size=bs)
    logger.info('-------------------')
예제 #2
0
def main():
    global embedding_dims
    global SAVE_DIR
    global _DIR
    global DATA_DIR
    global CONFIG
    global CONFIG_FILE
    global MODEL_NAME
    global DOMAIN_DIMS

    time_1 = time.time()
    with open(CONFIG_FILE) as f:
        CONFIG = yaml.safe_load(f)

    _DIR = CONFIG['_DIR']
    DATA_DIR = CONFIG['DATA_DIR'] + '/' + _DIR
    setup_general_config()

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #

    train_x_pos, train_x_neg, test_pos, test_anomaly, domain_dims = data_fetcher.get_data_v3(
        CONFIG['DATA_DIR'], _DIR, c=2)

    DOMAIN_DIMS = domain_dims
    print('Data shape', train_x_pos.shape)

    time_1 = time.time()
    process(CONFIG, _DIR, train_x_pos, train_x_neg, test_pos, test_anomaly)

    time_2 = time.time()

    print('time taken ', time_2 - time_1)
def main(exec_dir=None, ablation_flag=False):
    global embedding_dims
    global SAVE_DIR
    global _DIR
    global DATA_DIR
    global CONFIG
    global CONFIG_FILE
    global MODEL_NAME
    global DOMAIN_DIMS
    global logger

    _DIR = exec_dir

    DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR)
    setup_general_config()

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #
    logger.info('-------------------')
    logger.info('DIR ' + exec_dir)
    logger.info(' Ablation ')
    logger.info(ablation_flag)

    train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3(
        CONFIG['DATA_DIR'], _DIR, c=1)

    testing_dict = {}

    for _c in range(1, 3 + 1):
        _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(
            CONFIG['DATA_DIR'], _DIR, c=_c)
        testing_dict[_c] = [test_pos, test_anomaly]

    print('Data pos shape', train_x_pos.shape)
    print('Data neg shape', train_x_neg.shape)

    time_1 = time.time()
    process(CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict,
            ablation_flag)

    time_2 = time.time()
    logger.info('-------------------')
    print('time taken ', time_2 - time_1)
예제 #4
0
def main():
    global embedding_dims
    global SAVE_DIR
    global _DIR
    global DATA_DIR
    global CONFIG
    global CONFIG_FILE
    global MODEL_NAME
    global DOMAIN_DIMS
    global logger

    DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR)
    setup_general_config()

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #
    logger.info('-------------------')

    logger.info('DIR ' + _DIR)

    train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3(
        CONFIG['DATA_DIR'], _DIR, c=1)

    testing_dict = {}

    for _c in range(1, 3 + 1):
        _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(
            CONFIG['DATA_DIR'], _DIR, c=_c)
        testing_dict[_c] = [test_pos, test_anomaly]

    DOMAIN_DIMS = domain_dims
    print('Data shape', train_x_pos.shape)
    process_all(CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict)

    logger.info('-------------------')
def get_data(data_dir, dir):
    train_x_pos, _, _, _, domain_dims = data_fetcher.get_data_v3(data_dir,
                                                                 dir,
                                                                 c=1)
    test_dict_cIdx_data = {}
    for c in range(1, 3 + 1):
        _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(data_dir,
                                                                   dir,
                                                                   c=c)

        test_pos_idList = test_pos[0]
        test_pos_x = test_pos[1]
        test_anomaly_idList = test_anomaly[0]
        test_anomaly_x = test_anomaly[1]

        test_ids = list(np.hstack([test_pos_idList, test_anomaly_idList]))

        test_data_x = np.vstack([test_pos_x, test_anomaly_x])

        test_dict_cIdx_data[c] = [test_ids, test_data_x, test_anomaly_idList]

    return train_x_pos, test_dict_cIdx_data
def get_data(data_dir, dir):
    def stringify_data(arr) -> np.array:
        tmp1 = []
        for i in range(arr.shape[0]):
            tmp2 = []
            for j in range(arr.shape[1]):
                tmp2.append(str(arr[i][j]) + '_' + str(j))
            tmp1.append(tmp2)

        tmp1 = np.array(tmp1)
        return tmp1

    train_x_pos, _, _, _, domain_dims = data_fetcher.get_data_v3(data_dir,
                                                                 dir,
                                                                 c=1)

    train_x_pos = stringify_data(train_x_pos)

    test_dict_cIdx_data = {}
    for c in range(1, 3 + 1):
        _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(data_dir,
                                                                   dir,
                                                                   c=c)

        test_pos_idList = test_pos[0]
        test_pos_x = test_pos[1]
        test_anomaly_idList = test_anomaly[0]
        test_anomaly_x = test_anomaly[1]

        test_ids = list(np.hstack([test_pos_idList, test_anomaly_idList]))

        test_data_x = np.vstack([test_pos_x, test_anomaly_x])

        test_data_x = stringify_data(test_data_x)
        test_dict_cIdx_data[c] = [test_ids, test_data_x, test_anomaly_idList]

    return train_x_pos, test_dict_cIdx_data
def main():
    global embedding_dims
    global SAVE_DIR
    global _DIR
    global DATA_DIR
    global CONFIG
    global CONFIG_FILE
    global MODEL_NAME
    global DOMAIN_DIMS
    global logger
    global OP_DIR
    setup_general_config()

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #
    logger.info('-------------------')
    logger.info('DIR ' + _DIR)

    train_x_pos, train_x_neg, test_pos, test_anomaly, domain_dims = data_fetcher.get_data_v3(
        CONFIG['DATA_DIR'], _DIR, c=2)

    DOMAIN_DIMS = domain_dims
    print('Data shape', train_x_pos.shape)
    train_x_embeddings = embedding_analysis_v1(CONFIG, _DIR, train_x_pos,
                                               train_x_neg, test_pos,
                                               test_anomaly)
    print(' >>>> ', train_x_embeddings.shape)

    # Write out the train_x & its embedding to a file
    op_data = [train_x_pos, train_x_embeddings]
    with open(os.path.join(OP_DIR, 'train_embedding_values.pkl'), 'wb') as fh:
        pickle.dump(op_data, fh, pickle.HIGHEST_PROTOCOL)
    logger.info('-------------------')
def vary_num_neg_type(_type=None):
    global embedding_dims
    global SAVE_DIR
    global _DIR
    global DATA_DIR
    global CONFIG
    global CONFIG_FILE
    global MODEL_NAME
    global DOMAIN_DIMS
    global logger

    with open(CONFIG_FILE) as f:
        CONFIG = yaml.safe_load(f)

    DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR)

    setup_general_config()

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #
    logger.info('-------------------')
    logger.info('DIR ' + _DIR)
    if _type is None:
        _type = 'normal'

    logger.info(' Negative samplying type ' + _type)

    if _type == 'ape':

        train_x_pos, train_x_neg, _, _, _, _, domain_dims = data_fetcher.get_data_v1(
            CONFIG['DATA_DIR'], _DIR, c=1)
        k = 3
        _indices = np.arange(0, train_x_neg.shape[1], k)
        train_x_neg = np.take(train_x_neg, _indices, axis=1)
    else:
        # ensure same number of samples as APE

        _, tmp, _, _, _, _, _ = data_fetcher.get_data_v1(CONFIG['DATA_DIR'],
                                                         _DIR,
                                                         c=1)
        k = 3
        _count = int(tmp.shape[1] / k)
        print(_count)
        train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3(
            CONFIG['DATA_DIR'], _DIR, c=1)
        train_x_neg = train_x_neg[:, :_count, :]

    testing_dict = {}

    for _c in range(1, 3 + 1):
        _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(
            CONFIG['DATA_DIR'], _DIR, c=_c)
        testing_dict[_c] = [test_pos, test_anomaly]

    DOMAIN_DIMS = domain_dims
    print('Data shape', train_x_pos.shape)

    process_all(CONFIG, _DIR, train_x_pos, train_x_neg, testing_dict)
    logger.info('-------------------')
예제 #9
0
def main():
    global embedding_dims
    global SAVE_DIR
    global _DIR
    global DATA_DIR
    global CONFIG
    global CONFIG_FILE
    global MODEL_NAME
    global DOMAIN_DIMS
    global logger

    with open(CONFIG_FILE) as f:
        CONFIG = yaml.safe_load(f)


    DATA_DIR = os.path.join(CONFIG['DATA_DIR'], _DIR)

    setup_general_config()

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(
            os.path.join(SAVE_DIR, 'checkpoints')
        )

    # ------------ #

    if not os.path.exists(os.path.join(SAVE_DIR, 'checkpoints')):
        os.mkdir(os.path.join(SAVE_DIR, 'checkpoints'))

    # ------------ #
    logger.info('-------------------')
    logger.info('DIR ' + _DIR)


    train_x_pos, train_x_neg, _, _, domain_dims = data_fetcher.get_data_v3(
        CONFIG['DATA_DIR'],
        _DIR,
        c=1
    )
    train_x_neg = train_x_neg[:,:12,:]
    testing_dict = {}

    for _c in range(2, 3 + 1):
        testing_dict[_c] = {}
        for s in [1] :
            _, _, test_pos, test_anomaly, _ = data_fetcher.get_data_v3(
                CONFIG['DATA_DIR'],
                _DIR,
                c=_c
            )
            compreXdata_loc = './../../comprex/comprexData/' + _DIR
            test_anomalies_ids_file = os.path.join(compreXdata_loc, "id_test_anomalies_c{}_sample{}.txt".format(_c,s))
            df_0 = pd.read_csv(test_anomalies_ids_file,header=None)
            test_anomalies_ids = list(df_0[0])

            test_anomaly_idList = test_anomaly[0]
            anomaly_data = test_anomaly[1]

            tmp_df = pd.DataFrame(
                np.hstack([np.reshape(test_anomaly_idList,[-1,1]),anomaly_data])
            )
            print(len(tmp_df))
            tmp_df = tmp_df.loc[tmp_df[0].isin(test_anomalies_ids)]
            print(len(tmp_df))
            test_anomaly_idList = (tmp_df[0]).values
            del tmp_df[0]
            anomaly_data = tmp_df.values
            test_anomaly = [test_anomaly_idList, anomaly_data]

            # ----

            test_set_ids_file = os.path.join(compreXdata_loc, "id_test_set_c{}_sample{}.txt".format(_c, s))
            df_0 = pd.read_csv(test_set_ids_file, header=None)
            test_set_ids = list(df_0[0])

            test_normal_idList = test_pos[0]
            test_x = test_pos[1]

            tmp_df = pd.DataFrame(
                np.hstack([np.reshape(test_normal_idList, [-1, 1]), test_x])
            )
            tmp_df = tmp_df.loc[tmp_df[0].isin(test_set_ids)]
            test_normal_idList = (tmp_df[0]).values
            del tmp_df[0]
            test_x = tmp_df.values
            test_pos = [test_normal_idList, test_x]
            testing_dict[_c][s] = [test_pos, test_anomaly]



    DOMAIN_DIMS = domain_dims
    print('Data shape', train_x_pos.shape)
    process_all(
        CONFIG,
        _DIR,
        train_x_pos,
        train_x_neg,
        testing_dict
    )
    logger.info('-------------------')