Ejemplo n.º 1
0
def gen_args(bucket=None, gcs_dir=None):
    args = [
        "--output-dir=./output_test",
        "--model-dir=./output_test/checkpoint",
        "--shuffle-batch=False",
    ]

    if bucket is not None:
        args.append("--bucket=" + bucket)

    if gcs_dir is not None:
        args.append("--gcs-dir=" + gcs_dir)

    return get_args(args)
Ejemplo n.º 2
0
def get_historical_agg_trades(base, quote):
    args = get_args()

    with_parquet = args.parquet

    symbol = base + quote
    start_at = args.start_at
    limit = args.limit

    params = {'symbol': symbol, 'fromId': start_at, 'limit': limit}

    return all_trade_to_csv(base=base,
                            quote=quote,
                            params=params,
                            with_parquet=with_parquet)
Ejemplo n.º 3
0
def get_historical_candlesticks(base, quote):
    args = get_args()

    with_parquet = args.parquet

    symbol = base + quote
    interval = args.interval
    start_at = args.start_at
    limit = args.limit

    params = {
        'symbol': symbol,
        'interval': interval,
        'startTime': start_at,
        'limit': limit
    }

    return all_candle_to_csv(base=base,
                             quote=quote,
                             params=params,
                             interval=interval,
                             with_parquet=with_parquet)
Ejemplo n.º 4
0
def main():

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    attack_param = {
        "ord": np.inf,
        "epsilon": 8. / 255.,
        "alpha": 2. / 255.,
        "num_iter": 20,
        "restart": 1
    }

    args = get_args()
    logger = metaLogger(args)
    logging.basicConfig(filename=args.j_dir + "/log/log.txt",
                        format='%(asctime)s %(message)s',
                        level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    seed_everything(args.seed)
    train_loader, test_loader = load_dataset(args.dataset, args.batch_size)

    model = get_model(args, device)
    opt, lr_scheduler = get_optim(model, args)
    ckpt_epoch = 0

    ckpt_dir = args.j_dir + "/" + str(args.j_id) + "/"
    ckpt_location = os.path.join(ckpt_dir,
                                 "custome_ckpt_" + logger.ckpt_status + ".pth")
    if os.path.exists(ckpt_location):
        ckpt = torch.load(ckpt_location)
        model.load_state_dict(ckpt["state_dict"])
        opt.load_state_dict(ckpt["optimizer"])
        ckpt_epoch = ckpt["epoch"]
        if lr_scheduler:
            lr_scheduler.load_state_dict(ckpt["lr_scheduler"])
        print("LOADED CHECKPOINT")

    for _epoch in range(ckpt_epoch, args.epoch):
        train_log = train(args, _epoch, logger, train_loader, model, opt,
                          device)

        test_log = test_clean(test_loader, model, device)
        adv_log = test_adv(test_loader, model, pgd_rand, attack_param, device)

        logger.add_scalar("pgd20/acc", adv_log[0], _epoch + 1)
        logger.add_scalar("pgd20/loss", adv_log[1], _epoch + 1)
        logger.add_scalar("test/acc", test_log[0], _epoch + 1)
        logger.add_scalar("test/loss", test_log[1], _epoch + 1)
        logging.info("Test set: Loss: {loss:.6f}\t"
                     "Accuracy: {acc:.2f}".format(loss=test_log[1],
                                                  acc=test_log[0]))
        logging.info("PGD20: Loss: {loss:.6f}\t"
                     "Accuracy: {acc:.2f}".format(loss=adv_log[1],
                                                  acc=adv_log[0]))

        if lr_scheduler:
            lr_scheduler.step()

        if (_epoch + 1) % args.ckpt_freq == 0:
            rotateCheckpoint(ckpt_dir, "custome_ckpt", model, opt, _epoch,
                             lr_scheduler)

        logger.save_log()
    logger.close()
    torch.save(model.state_dict(), args.j_dir + "/model/model.pt")
Ejemplo n.º 5
0
def main():
    """Main loop; loop over all currency pairs that exist on the exchange.
    """
    args = get_args()
    print(args)

    with_parquet = args.parquet
    upload_parquet = args.upload
    interval = args.interval
    data_type = args.dtype
    pairs = "".join(args.pairs.split())  # remove whitespace

    if pairs == 'all':
        # get all pairs currently available
        all_symbols = pd.DataFrame(
            requests.get(f'{API_BASE}exchangeInfo').json()['symbols'])
        all_pairs = [
            tuple(x)
            for x in all_symbols[['baseAsset', 'quoteAsset']].to_records(
                index=False)
        ]
    else:
        all_pairs = [tuple(pair.split('-')) for pair in pairs.split(',')]
        #all_pairs = [('BTC', 'USDT')]
        #all_pairs = [('DF', 'ETH')]

    # randomising order helps during testing and doesn't make any difference in production
    random.shuffle(all_pairs)

    # make sure data folders exist
    os.makedirs('data', exist_ok=True)
    os.makedirs('compressed', exist_ok=True)

    # do a full update on all pairs
    n_count = len(all_pairs)
    for n, pair in enumerate(all_pairs, 1):
        base, quote = pair

        # default params for klines
        symbol = base + quote
        if data_type == 'candle':
            new_lines = get_historical_candlesticks(base, quote)

        elif data_type == 'trade':
            new_lines = get_historical_agg_trades(base, quote)

        if new_lines > 0:
            print(
                f'{datetime.now()} {n}/{n_count} Wrote {new_lines} new lines to file for {data_type}_{base}-{quote}_interval-{interval}'
            )
        else:
            print(
                f'{datetime.now()} {n}/{n_count} Already up to date with {data_type}_{base}-{quote}_interval-{interval}'
            )

    # clean the data folder and upload a new version of the dataset to kaggle
    try:
        os.remove('compressed/.DS_Store')
    except FileNotFoundError:
        pass

    if with_parquet and upload_parquet:
        write_metadata(n_count)
        yesterday = date.today() - timedelta(days=1)
        subprocess.run([
            'kaggle', 'datasets', 'version', '-p', 'compressed/', '-m',
            f'full update of all {n_count} pairs up to {str(yesterday)}'
        ])
        os.remove('compressed/dataset-metadata.json')
Ejemplo n.º 6
0
def all_trade_to_csv(base, quote, params=None, with_parquet=False):
    """Collect a list of candlestick batches with all candlesticks of a trading pair,
    concat into a dataframe and write it to CSV.
    """

    args = get_args()
    filepath = f'data/trade_{base}-{quote}.csv'

    api_path = 'aggTrades'

    # see if there is any data saved on disk already
    try:
        if params['fromId'] == 0:
            batches = [pd.read_csv(filepath)]
            last_id = batches[-1]['a'].max()
            params['fromId'] = last_id + 1
        else:
            last_id = params['fromId']
            params['fromId'] = last_id + 1
        batches = [pd.DataFrame([])]  # clear
        # if already have data start from last_id
    except FileNotFoundError:
        batches = [pd.DataFrame([])]
        last_id = params['fromId']

    old_lines = len(batches[-1].index)

    # gather all trades available, starting from the last id loaded from disk or provided fromId
    # stop if the id that comes back from the api is the same as the last one
    previous_id = -1

    while previous_id != last_id:
        # stop if we reached data
        if previous_id >= last_id and previous_id > 0:
            break

        previous_id = last_id

        new_batch = get_batch(params=params,
                              api_path=api_path,
                              timeout=args.timeout)

        # requesting candles from the future returns empty
        # also stop in case response code was not 200
        if new_batch.empty:
            break

        last_id = new_batch['a'].max()
        print(last_id, previous_id)
        timestamp = new_batch['T'].max()

        # update fromId to continue from last id
        params['fromId'] = last_id + 1

        batches.append(new_batch)
        last_datetime = datetime.fromtimestamp(timestamp / 1000)

        covering_spaces = 20 * ' '
        print(datetime.now(),
              base,
              quote,
              str(last_datetime) + covering_spaces,
              end='\r',
              flush=True)

        # if huge data
        # compute size @TODO get field not hardcoded
        lines = len(batches) * params['limit']
        if lines >= 5000:
            df = pp.prepare_df(batches, field='a')
            pp.append_to_csv(df, filepath)
            # reset
            batches.clear()

    if len(batches) > 1:
        df = pp.prepare_df(batches, field='a')

    if with_parquet:
        # write clean version of csv to parquet
        parquet_name = f'{base}-{quote}.parquet'
        full_path = f'compressed/{parquet_name}'
        pp.write_raw_to_parquet(df, full_path)
        METADATA['data'].append({
            'description':
            f'All {data_type} history for the pair {base} and {quote} at {interval} intervals. Counts {df.index.size} records.',
            'name': parquet_name,
            'totalBytes': os.stat(full_path).st_size,
            'columns': []
        })

    # in the case that new data was gathered write it to disk
    if len(batches) > 1:
        pp.append_to_csv(df, filepath)
        #df.to_csv(filepath, index=False)
        return len(df.index) - old_lines
    return 0
Ejemplo n.º 7
0
                f'{datetime.now()} {n}/{n_count} Wrote {new_lines} new lines to file for {data_type}_{base}-{quote}_interval-{interval}'
            )
        else:
            print(
                f'{datetime.now()} {n}/{n_count} Already up to date with {data_type}_{base}-{quote}_interval-{interval}'
            )

    # clean the data folder and upload a new version of the dataset to kaggle
    try:
        os.remove('compressed/.DS_Store')
    except FileNotFoundError:
        pass

    if with_parquet and upload_parquet:
        write_metadata(n_count)
        yesterday = date.today() - timedelta(days=1)
        subprocess.run([
            'kaggle', 'datasets', 'version', '-p', 'compressed/', '-m',
            f'full update of all {n_count} pairs up to {str(yesterday)}'
        ])
        os.remove('compressed/dataset-metadata.json')


if __name__ == '__main__':
    args = get_args()

    if args.check_trade != None:
        pp.check_trade_index(args.check_trade)
    else:
        main()