Ejemplo n.º 1
0
    def __load_ohlcv(self, bin_size):
        """
        Read the data.
        :return:
        """
        start_time = datetime.now(timezone.utc) - 1 * timedelta(days=121)
        end_time = datetime.now(timezone.utc)
        file = OHLC_FILENAME.format(self.pair, bin_size)

        self.bin_size = bin_size

        if os.path.exists(file):
            self.df_ohlcv = load_data(file)
            self.df_ohlcv.set_index(self.df_ohlcv.columns[0], inplace=True)

            if (self.update_data):
                self.df_ohlcv = self.df_ohlcv[:-1]  # exclude last candle
                data = self.download_data(
                    bin_size,
                    dateutil.parser.isoparse(self.df_ohlcv.iloc[-1].name),
                    end_time)
                self.df_ohlcv = pd.concat([self.df_ohlcv, data])
                self.save_csv(self.df_ohlcv, file)

            # self.df_ohlcv.reset_index(inplace=True)
            self.df_ohlcv = load_data(file)

        else:
            data = self.download_data(bin_size, start_time, end_time)
            self.save_csv(data, file)
            self.df_ohlcv = load_data(file)

        if self.check_candles_flag:
            self.check_candles(self.df_ohlcv)
Ejemplo n.º 2
0
    def test_validate_continuous(self):
        file = os.path.join(os.path.dirname(__file__), "./ohlc/discontinuous.csv")
        data = load_data(file)
        assert not validate_continuous(data, '5m')[0]

        file = os.path.join(os.path.dirname(__file__), "./ohlc/continuous.csv")
        data = load_data(file)
        assert validate_continuous(data, '5m')[0]
Ejemplo n.º 3
0
    def __load_ohlcv(self, bin_size):
        """
        Read the data.
        :return:
        """
        start_time = datetime.now(timezone.utc) - 1 * timedelta(days=self.days)
        end_time = datetime.now(timezone.utc)
        file = OHLC_FILENAME.format("binance_futures", self.pair, bin_size)

        # Force minute granularity if multiple timeframes are used
        if len(bin_size) > 1:
            self.minute_granularity = True

        if self.minute_granularity and "1m" not in bin_size:
            bin_size.append(
                '1m'
            )  # add 1m timeframe to the list in case we need minute granularity

        self.bin_size = bin_size

        warmup = None  # warmup needed for each timeframe in munutes

        for t in bin_size:
            if self.warmup_tf == None:
                warmup = allowed_range_minute_granularity[t][3]
                self.warmup_tf = t
            elif warmup < allowed_range_minute_granularity[t][3]:
                warmup = allowed_range_minute_granularity[t][3]
                self.warmup_tf = t
            else:
                continue

        if os.path.exists(file):
            self.df_ohlcv = load_data(file)
            self.df_ohlcv.set_index(self.df_ohlcv.columns[0], inplace=True)

            if self.update_data:
                self.df_ohlcv = self.df_ohlcv[:-1]  # exclude last candle
                data = self.download_data(
                    bin_size,
                    dateutil.parser.isoparse(self.df_ohlcv.iloc[-1].name),
                    end_time)
                self.df_ohlcv = pd.concat([self.df_ohlcv, data])
                self.save_csv(self.df_ohlcv, file)

            self.df_ohlcv.reset_index(inplace=True)
            self.df_ohlcv = load_data(file)

        else:
            data = self.download_data(bin_size, start_time, end_time)
            self.save_csv(data, file)
            self.df_ohlcv = load_data(file)

        if self.check_candles_flag:
            self.check_candles(self.df_ohlcv)
Ejemplo n.º 4
0
    def __load_ohlcv(self, bin_size):
        """
        データを読み込む。
        :return:
        """
        start_time = datetime.now(timezone.utc) - 1 * timedelta(days=31)
        end_time = datetime.now(timezone.utc)
        file = OHLC_FILENAME.format(bin_size)

        if os.path.exists(file):
            self.df_ohlcv = load_data(file)
        else:
            self.download_data(file, bin_size, start_time, end_time)
            self.df_ohlcv = load_data(file)
Ejemplo n.º 5
0
def do_reduce(idx, params, test_mode=True):
    start = time.time()
    if idx % 100 == 0 or test_mode:
        print(str(idx) + "   " + str(datetime.datetime.now()))

    # reduce data
    data_path = os.path.join('cache', 'data_reduced',
                             (params['data_hash_id'] + '.pkl'))
    if not os.path.isfile(data_path):
        print('applying data reduction')
        data = src.load_data(params['dataset'])

        # make dim reduction
        reducer = getattr(src.reducers, params['reducer'])
        data_reduced = reducer(data, params)

        # pickle reduced data
        pickle.dump(data_reduced, open(data_path, 'wb'))

        total_time = round(time.time() - start, ndigits=2)
        time_out = {
            'data_hash_id': params['data_hash_id'],
            'time_reduce': total_time
        }
        time_out = pd.DataFrame(time_out, index=[idx])
        time_out.to_csv(
            os.path.join('cache', 'data_reduced_time',
                         (params['data_hash_id'] + '.csv')))
Ejemplo n.º 6
0
    def __load_ohlcv(self, bin_size):
        logger.info('bitmex_backtest >> __load_ohlcv >> %s ' % bin_size)
        """
        データを読み込む。
        :return:
        """
        start_time = datetime.now(timezone.utc) - 1 * timedelta(days=31)
        end_time = datetime.now(timezone.utc)
        file = OHLC_FILENAME.format(bin_size)

        start = time.time()  # 시작 시간 저장
        if os.path.exists(file):
            self.df_ohlcv = load_data(file)
        else:
            self.download_data(file, bin_size, start_time, end_time)
            self.df_ohlcv = load_data(file)
        logger.info('bitmex_backtest >> __load_ohlcv >> loading data time : %s' % str(time.time() - start))
        print('load_data time:', time.time() - start)
Ejemplo n.º 7
0
 def test_load_file(self):
     bitmex = BitMexBackTest()
     end_time = datetime.datetime.now(datetime.timezone.utc)
     start_time = end_time - 5 * datetime.timedelta(hours=2)
     with tempfile.TemporaryDirectory() as dir:
         file = dir + "/tmp.csv"
         bitmex.download_data(file, '2h', start_time, end_time)
         data_frame = load_data(file)
         now = datetime.datetime.now(datetime.timezone.utc)
         assert data_frame.iloc[0].name < now
Ejemplo n.º 8
0
def test_load_data():
    from src import load_data

    n_features = np.random.randint(5, 20)
    n_samples = np.random.randint(50, 150)
    features, targets, attribute_names = write_random_csv_file(
        n_features, n_samples)

    _features, _targets, _attribute_names = load_data('tests/test.csv')
    assert attribute_names == _attribute_names
    assert np.allclose(features, _features) and np.allclose(targets, _targets)
def test_information_gain():
    from src import load_data
    from src import information_gain

    _features, _targets, _attribute_names = load_data('data/PlayTennis.csv')
    iGHumidity = information_gain(_features, 2, _targets)
    iGWind = information_gain(_features, 3, _targets)
    realIGHumidity = 0.1515
    realIGWind = 0.048

    assert np.abs(iGHumidity - realIGHumidity) < 1e-3
    assert np.abs(iGWind - realIGWind) < 1e-3
Ejemplo n.º 10
0
def main():
    data_generator = load_data()
    _history = []
    device = None
    model = None
    criterion = None
    fold_index = 0

    for TEXT, LABEL, train_data, val_data in data_generator.get_fold_data(num_folds=args['num_folds']):
        logger.info("***** Running Training *****")
        logger.info(f"Now fold: {fold_index + 1} / {args['num_folds']}")

        TEXT.build_vocab(train_data, max_size=25000, vectors="glove.6B.300d")
        logger.info(f'Embedding size: {TEXT.vocab.vectors.size()}.')
        LABEL.build_vocab(train_data) # For converting str into float labels.

        model = Model(len(TEXT.vocab), args['embedding_dim'], args['hidden_dim'],
            args['output_dim'], args['num_layers'], args['dropout'], TEXT.vocab.vectors, args["embedding_trainable"])
        
        optimizer = optim.Adam(model.parameters())
        criterion = nn.BCEWithLogitsLoss()

        if args['gpu'] is True and args['gpu_number'] is not None:
            torch.cuda.set_device(args['gpu_number'])
            device = torch.device('cuda')
            model = model.to(device)
            criterion = criterion.to(device)
        else:
            device = torch.device('cpu')
            model = model.to(device)
            criterion = criterion.to(device)
        
        train_iterator = data.Iterator(train_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device)
        val_iterator = data.Iterator(val_data, batch_size=args['batch_size'], sort_key=lambda x: len(x.text), device=device)

        for epoch in range(args['epochs']):
            train_loss, train_acc = train_run(model, train_iterator, optimizer, criterion)
            logger.info(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
        val_loss, val_acc = eval_run(model, val_iterator, criterion)
        logger.info(f'Val. Loss: {val_loss:.3f} | Val. Acc: {val_acc*100:.2f}% |')

        _history.append([val_loss, val_acc])
        fold_index += 1
    
    _history = np.asarray(_history)
    loss = np.mean(_history[:, 0])
    acc = np.mean(_history[:, 1])
    
    logger.info('***** Cross Validation Result *****')
    logger.info(f'LOSS: {loss}, ACC: {acc}')
Ejemplo n.º 11
0
def prep_data(DATA_FILE, PERCENTAGE_TRAIN):
    def pad(sentence):
        if (len(sentence) > MAX_SEQ_LEN):
            return sentence[:MAX_SEQ_LEN]
        else:
            num_pads = MAX_SEQ_LEN - len(sentence)
            return sentence + [""] * num_pads

    def process_rows(data, glove_vectors):
        """
            Generate input x matrix M[num_data_points][glove_dimension][max_seq_length]
        """

        inputs, outputs = [], []
        for sentence, mbti_hot in data:
            x = []
            ### Pad sentence
            padded_sentence = pad(sentence)

            ### Convert each word in sentence to glove vector
            for w in padded_sentence:
                embedding = np.array([0.0] * GLOVE_DIMENSION)
                if (w in glove_vectors):
                    embedding = glove_vectors[w]
                x.append(embedding)

            inputs.append(x)
            outputs.append(mbti_hot)

        return inputs, outputs

    train_data, test_data = load_data(DATA_FILE, PERCENTAGE_TRAIN)
    glove_vectors = load_word_vectors("../data/glove.6B.50d.txt", GLOVE_DIMENSION)
    train_x, train_y = process_rows(train_data, glove_vectors)
    test_x, test_y = process_rows(test_data, glove_vectors)

    return np.array(train_x), np.array(test_x), np.array(train_y), np.array(test_y)

    for i, x in enumerate(train_x):
        if (i < 10):
            print("x", x)
            print("y", train_y[i])
Ejemplo n.º 12
0
        # It must be used the prefix "nn__module__" when setting hyperparameters for the Neural Net
        params = {
            'nn__max_epochs': [10, 20],
            'nn__lr': [0.1, 0.01],
            'nn__module__num_units': [5, 10],
            'nn__module__dropout': [0.1, 0.5],
            'nn__optimizer': [optim.Adam, optim.SGD, optim.RMSprop]
        }

        # The grid search module is instantiated
        gs = GridSearchCV(pipeline,
                          params,
                          refit=False,
                          cv=3,
                          scoring='balanced_accuracy',
                          verbose=1)
        # Initialize grid search
        gs.fit(self.x, self.y)
        pass


if __name__ == "__main__":
    x, y = load_data()

    run = Run(x, y)

    # run.simple_training()
    # run.simple_pipeline_training()
    # run.simple_pipeline_training_with_callbacks()
    run.grid_search_pipeline_training()
Ejemplo n.º 13
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
from src.cus_string import *
from src.check_crpyto import *
from src.cryptomodule import *
from src.listdir import *
from src.load_data import *
from src.out_data import *
import timeit, sys

sys.setrecursionlimit(20000)

if __name__ == "__main__":
    path = os.getcwd()
    file = list_dir(path)
    list = load_data(path, file)
    encrypt_list, decrypt_list, enc_flag, dec_flag = check_list(list)

    while True:
        if enc_flag:
            result = []
            result_rec = []
            print("Encrypted Data!")
            attribute = True

            for string in encrypt_list:
                result_string = ""
                start = timeit.default_timer()
                result.append(encrypt_string(string.rstrip("\n")))
                end = timeit.default_timer()
                print("lteractive call execute time : %f" % (end - start))
Ejemplo n.º 14
0
from src.bubble_sort import *
from src.heap_sort import *
from src.insert_sort import *
from src.merge_sort import *
from src.quick_sort import *
from src.select_sort import *
from src.load_data import *
from src.out_data import *
from src.interface import *
from src.listdir import *
import timeit

if __name__ == "__main__":
    path = os.getcwd()
    file_name = list_dir(path)
    orignal, field = load_data(path, file_name)
    data = orignal
    res = None
    attribute = 0

    try:
        column = int(input("Input column in Data>> "))
        if column > len(field) or column < 1:
            print("Error : invaild column ")
            exit(0)

        column = column - 1

    except ValueError:
        print("Error :  invaild value ...")
        exit(0)
Ejemplo n.º 15
0
        'deep': True,
        'nonlinear': 'relu'
    }, {
        'bn': True,
        'dropout': True,
        'branched': True,
        'deep': True,
        'nonlinear': 'elu'
    }]

    for model_name, model_config in zip(model_names, model_configs):
        for subject in subjects:

            print 'working on subject : ', subject
            x, y, t, tr = load_data(subject,
                                    channels=range(29),
                                    frequency=dataset)
            x = preprocessing(x, frequency=dataset)
            metrics, histories, cnf_matrices = cross_validator(
                (x, y, t, tr),
                subject,
                n_splits=5,
                epochs=epochs,
                batch_size=batch_size,
                lr=lr,
                model_name=model_name,
                model_config=model_config,
                early_stopping=early_stopping,
                patience=patience)

            super_final_results = save_resutls(metrics,