Ejemplo n.º 1
0
    args = argparser.parse_args()

    assert args.train != '' or args.resume_training != '', "Must provide training data or a model"
    assert not (args.evaluate
                and not args.resume_training), "provide a model with --resume"
    assert not (not args.evaluate and
                (args.train == ''
                 or args.valid == '')), "use --train and --valid for training"
    assert args.batch_size % 2 == 0, "use a multiple of 2 for batch_size"

    if not args.keep_probs:
        args.keep_probs = np.ones(len(args.hiddens) + 1)
    assert len(args.hiddens) + 1 == len(args.keep_probs)

    print(' '.join(sys.argv))
    print(git_log())

    np.random.seed(args.seed)
    random.seed(args.seed)
    tf.set_random_seed(args.seed)
    np.set_printoptions(precision=3)
    np.set_printoptions(suppress=True)

    required_folders = ['log', 'summary', 'model']
    for folder in required_folders:
        if not os.path.exists(folder):
            os.makedirs(folder)

    # load data
    feat_idx = np.arange(11)
    args.n_feat = 8 * 8 + 2 * 16 + 1  # ip-> 8*8, port-> 2*16, protocol->1
if __name__ == '__main__':
    argparser = argparse.ArgumentParser(sys.argv[0])
    argparser.add_argument("--data", type=str, nargs='*', help="list of input .npy data", required=True)
    argparser.add_argument("--save", type=str, help="prefix to save the results", required=True)
    argparser.add_argument("--seed", type=int, help="random state for sklearn", default=69)
    argparser.add_argument("--n_hashes_list", type=int, nargs='*', help="number of hashes", required=True)
    argparser.add_argument("--space_list", type=float, nargs='*', help="space in MB", required=True)
    argparser.add_argument("--n_workers", type=int, help="number of workers", default=10)
    argparser.add_argument("--aol_data", action='store_true', default=False)
    argparser.add_argument("--count_sketch", action='store_true', default=False)
    args = argparser.parse_args()

    command = ' '.join(sys.argv) + '\n'
    log_str = command
    log_str += git_log() + '\n'
    print(log_str)
    np.random.seed(args.seed)

    if args.aol_data:
        assert len(args.data) == 1
        x, y = get_data_aol_query(args.data[0])
    else:
        x, y = get_data_str_with_ports_list(args.data)
    get_stat(args.data, x, y)

    if args.count_sketch:
        name = 'count_sketch'
    else:
        name = 'count_min'
    folder = os.path.join('param_results', name, '')