Example #1
0
    def predict_day(self, pipeline: str, dataset: str, target: str,
                    symbol: str, day: str, window: dict):
        model = self.get_model(pipeline, dataset, target, symbol)
        # Load dataset
        ds = DatasetService()
        d = ds.get_dataset(model.dataset, model.symbol)
        # Get training data including the first training window
        begin = sub_interval(timestamp=day, interval=window)
        if from_timestamp(d.valid_index_min).timestamp() > from_timestamp(
                begin).timestamp():
            raise MessageException("Not enough data for training! [Pipeline: {} Dataset: {} Symbol: {} Window: {}]" \
                                   .format(model.pipeline, model.dataset, model.symbol, window))
        X = ds.get_features(model.dataset, model.symbol, begin=begin, end=day)
        y = ds.get_target(model.target, model.symbol, begin=begin, end=day)

        unique, counts = np.unique(y, return_counts=True)
        if len(unique) < 2:
            logging.error(
                "[{}-{}-{}-{}]Training data contains less than 2 classes: {}".
                format(model.symbol, model.dataset, model.target,
                       model.pipeline, unique))
            raise MessageException(
                "Training data contains less than 2 classes: {}".format(
                    unique))

        # Load pipeline
        pipeline_module = get_pipeline(model.pipeline)
        # Slice testing interval in windows

        df = predict_day(pipeline_module.estimator, model.parameters[-1], X, y,
                         day)

        return df
Example #2
0
    def test_model(self, model: Model, mt: ModelTest, **kwargs):
        if not model.id:
            model = self.model_repo.create(model)
        if self.model_repo.exist_test(model.id, mt.task_key):
            logging.info("Model {} test {} already executed!".format(
                model.id, mt.task_key))
            return mt
        # Load dataset
        ds = DatasetService()
        d = ds.get_dataset(model.dataset, model.symbol)
        # Get training data including the first training window
        begin = sub_interval(timestamp=mt.test_interval.begin,
                             interval=mt.window)
        end = add_interval(timestamp=mt.test_interval.end, interval=mt.step)
        if from_timestamp(d.valid_index_min).timestamp() > from_timestamp(
                begin).timestamp():
            raise MessageException("Not enough data for training! [Pipeline: {} Dataset: {} Symbol: {} Window: {}]" \
                                   .format(model.pipeline, model.dataset, model.symbol, mt.window))
        X = ds.get_features(model.dataset, model.symbol, begin=begin, end=end)
        y = ds.get_target(model.target, model.symbol, begin=begin, end=end)

        unique, counts = np.unique(y, return_counts=True)
        if len(unique) < 2:
            logging.error(
                "[{}-{}-{}-{}]Training data contains less than 2 classes: {}".
                format(model.symbol, model.dataset, model.target,
                       model.pipeline, unique))
            raise MessageException(
                "Training data contains less than 2 classes: {}".format(
                    unique))

        # Load pipeline
        pipeline_module = get_pipeline(model.pipeline)
        # Slice testing interval in windows

        ranges = timestamp_windows(begin, end, mt.window, mt.step)

        mt.start_at = get_timestamp()
        df = test_windows(pipeline_module.estimator, mt.parameters, X, y,
                          ranges)
        mt.end_at = get_timestamp()

        mt.classification_results = df.to_dict()

        clf_report = flattened_classification_report_imbalanced(
            df.label, df.predicted)
        roc_report = roc_auc_report(
            df.label, df.predicted,
            df[[c for c in df.columns if '_proba_' in c]])
        clf_report.update(roc_report)
        mt.classification_report = clf_report

        self.model_repo.append_test(model.id, mt)

        return mt
Example #3
0
 def get_builder(self, builder):
     if not builder in BUILDER_LIST:
         raise MessageException('Package cryptoml.builders has no {} module!'.format(builder))
     try:
         builder_module = importlib.import_module('cryptoml.builders.{}'.format(builder))
         if not builder_module:
             raise MessageException('Failed to import cryptoml.builders.{} (importlib returned None)!'.format(builder))
         if not hasattr(builder_module, 'build'):
             raise MessageException('Builder cryptoml.builders.{} has no "build" method!'.format(builder))
     except Exception as e:
         logging.exception(e)
         raise MessageException('Failed to import cryptoml.builders.{} !'.format(builder))
     return builder_module
def predict_day(est, parameters, X, y, day, **kwargs):
    result = _test_window(est, parameters, X, y, day)
    df = pd.DataFrame(result["entry"])
    if df.empty:
        raise MessageException("predict_day: Empty result dataframe!")
    df = df.set_index('time')
    return df
Example #5
0
    def _get_dataset_and_pipeline(self, model: Model, mp: ModelParameters,
                                  **kwargs):
        if not model.id:  # Make sure the task exists
            model = self.model_repo.create(model)
        if self.model_repo.exist_parameters(model.id, mp.task_key):
            logging.info("Model {} Grid search {} already executed!".format(
                model.id, mp.task_key))
            return mp

        # Load dataset
        X = self.dataset_service.get_features(model.dataset,
                                              model.symbol,
                                              mp.cv_interval.begin,
                                              mp.cv_interval.end,
                                              columns=mp.features)
        y = self.dataset_service.get_target(model.target, model.symbol,
                                            mp.cv_interval.begin,
                                            mp.cv_interval.end)

        unique, counts = np.unique(y, return_counts=True)
        if len(unique) < 2:
            logging.error(
                "[{}-{}-{}-{}]Training data contains less than 2 classes: {}".
                format(model.symbol, model.dataset, model.target,
                       model.pipeline, unique))
            raise MessageException(
                "Training data contains less than 2 classes: {}".format(
                    unique))
        logging.info("Dataset loaded: X {} y {} (unique: {})".format(
            X.shape, y.shape, unique))
        # Load pipeline
        pipeline_module = get_pipeline(model.pipeline)
        return pipeline_module, X, y
    def feature_selection(self, mf: ModelFeatures, **kwargs) -> ModelFeatures:

        # Load dataset
        X = self.dataset_service.get_features(mf.dataset,
                                              mf.symbol,
                                              mf.search_interval.begin,
                                              mf.search_interval.end,
                                              columns=mf.features)
        y = self.dataset_service.get_target(mf.target, mf.symbol,
                                            mf.search_interval.begin,
                                            mf.search_interval.end)

        unique, counts = np.unique(y, return_counts=True)
        if len(unique) < 2:
            logging.error(
                "[{}-{}-{}]Training data contains less than 2 classes: {}".
                format(mf.symbol, mf.dataset, mf.target, unique))
            raise MessageException(
                "Training data contains less than 2 classes: {}".format(
                    unique))

        # Perform search
        mf.start_at = get_timestamp()  # Log starting timestamp
        if not mf.feature_selection_method or mf.feature_selection_method == 'importances':
            selector = select_from_model(X, y)
            mf.feature_importances = label_feature_importances(
                selector.estimator_, X.columns)
        elif mf.feature_selection_method == 'importances_cv':
            selector = select_from_model_cv(X, y)
            mf.feature_importances = label_feature_importances(
                selector.estimator_.best_estimator_, X.columns)
        elif mf.feature_selection_method == 'fscore':
            selector = select_percentile(X, y, percentile=10)
        elif mf.feature_selection_method == 'relieff':
            selector = select_relieff(X, y, percentile=10)
        elif mf.feature_selection_method == 'multisurf':
            selector = select_multisurf(X, y, percentile=10)
        else:
            raise NotFoundException(
                "Cannot find feature selection method by {}".format(
                    mf.feature_selection_method))
        mf.end_at = get_timestamp()  # Log ending timestamp

        # Update search request with results
        mf.features = label_support(selector.get_support(), X.columns)

        # Update model with the new results
        if kwargs.get('save', True):
            self.model_repo.append_features_query(
                {
                    "dataset": mf.dataset,
                    "symbol": mf.symbol,
                    "target": mf.target
                }, mf)
        return mf
 def update_take_profit(self, asset: Asset, position: Position,
                        close_price: float, pct: float):
     # Get the index of this position inside the asset's positions so we can replace it with the updated one
     index = TradingService.get_position_index(asset, position)
     if index is None:
         raise MessageException(
             f"[TP Update] Position {position.id} is not related to asset {asset.symbol} with ID {asset.id}!"
         )
     position.take_profit = TradingService.apply_price_change(
         close_price, pct)
     asset.positions[index] = position
     return self.repo.update(asset.id, asset)
Example #8
0
    def create_parameters_search(self, model: Model, split: float,
                                 **kwargs) -> ModelParameters:
        ds = self.dataset_service.get_dataset(model.dataset, model.symbol)
        splits = DatasetService.get_train_test_split_indices(ds, split)

        # Features can either be a list of features to use, or a string
        #   If it is a string, and it is "latest", pick the latest
        features = kwargs.get('features')
        # if isinstance(features, str) and features == 'latest':
        #     if model.features:
        #         features = model.features[-1].features
        #     else:
        #         features = None
        if features:
            target = kwargs.get('target', 'class')
            mf = DatasetService.get_feature_selection(
                ds=ds, method=kwargs.get('features'), target=target)
            if not mf:
                raise MessageException(
                    f"Feature selection not found for {model.dataset}.{model.symbol} -> {target}!"
                )
            features = mf.features

        # Determine K for K-fold cross validation based on dataset's sample count
        # Train-test split for each fold is 80% train, the lowest training window for accurate results is 30 samples
        # so we need X samples where X is given by the proportion:
        #       30/0.8 = X/1; X= 30/0.8 = 37.5 ~ 40 samples per fold
        X = 40
        k = 5
        # If samples per fold with 5-fold CV are too low, use 3-folds
        if ds.count / k < X:
            k = 3
        # If samples are still too low, raise a value error
        if ds.count / k < X and not kwargs.get("permissive"):
            raise ValueError("Not enough samples to perform cross validation!")

        result = ModelParameters(cv_interval=splits['train'],
                                 cv_splits=k,
                                 task_key=kwargs.get('task_key', str(uuid4())),
                                 features=features or None)
        return result
Example #9
0
def roc_auc_report(y_true, y_pred, y_pred_proba):
    if np.isnan(y_true.values).any() or np.isinf(y_true.values).any():
        raise MessageException("y_true contains NaN")
    if np.isnan(y_pred_proba.values).any() or np.isinf(
            y_pred_proba.values).any():
        # If classifier has diverged, predict_proba will contain nans.
        # We replace them with 0
        with pd.option_context('mode.use_inf_as_na', True):
            y_pred_proba = y_pred_proba.fillna(value=0)
        #raise MessageException("y_pred_proba contains NaN")
    classes = np.unique(y_true)
    result = {}
    try:
        if classes.size < 2:
            result['roc_auc_ovo_macro'] = roc_auc_score(y_true,
                                                        y_pred_proba,
                                                        average='macro',
                                                        multi_class='ovo')
            result['roc_auc_ovo_weighted'] = roc_auc_score(y_true,
                                                           y_pred_proba,
                                                           average='weighted',
                                                           multi_class='ovo')
            result['roc_auc_ovr_macro'] = roc_auc_score(y_true,
                                                        y_pred_proba,
                                                        average='macro',
                                                        multi_class='ovr')
            result['roc_auc_ovr_weighted'] = roc_auc_score(y_true,
                                                           y_pred_proba,
                                                           average='weighted',
                                                           multi_class='ovr')
        else:
            result['roc_auc'] = roc_auc_score(y_true, y_pred)
    except ValueError as e:
        print("ROC_AUC Failed {}".format(e))
        pass
    # fpr_0, tpr_0, thr_0 = roc_curve(y_true, y_pred, pos_label=0)
    # fpr_1, tpr_1, thr_1 = roc_curve(y_true, y_pred, pos_label=1)
    # fpr_2, tpr_2, thr_2 = roc_curve(y_true, y_pred, pos_label=2)

    return {k: float(v) for k, v in result.items()}
def test_windows(est, parameters, X, y, ranges, parallel=True, **kwargs):
    _n_jobs = int(kwargs.get('n_jobs', cpu_count() / 2))
    if parallel:
        results = Parallel(n_jobs=_n_jobs)(delayed(_test_window)(
            est, parameters, X.loc[b:e, :], y.loc[b:e], e) for b, e in ranges)
    else:
        results = [
            _test_window(est, parameters, X.loc[b:e, :], y.loc[b:e], e)
            for b, e in ranges
        ]
    results_data = [r["entry"] for r in results if r is not None]
    df = pd.DataFrame(results_data)
    if df.empty:
        raise MessageException("TestWindows: Empty result dataframe!")
    df = df.set_index('time')

    shap_df_per_class = {}
    for shap in [r["shap"] for r in results if r is not None]:
        for cls, arr in enumerate(shap["values"]):
            print(arr)
    # shap_df = pd.concat(shap_dfs, axis='columns')
    # shap_df = shap_df.reindex(sorted(shap_df.columns), axis=1)

    return df
    def open_long(self, asset: Asset, day: str, close_price: float,
                  size: float, **kwargs):
        # Get operation's fiat value and collateral amount involved in this position
        op_price, amount = TradingService.get_position_size_amount(
            asset, close_price, size)

        # Margin long trades incur an opening fixed fee in FIAT, paid immediately
        op_fee = round(op_price * MARGIN_LONG_FIXED_FEE, 3)

        # Fiat wallet must hold the position's price (in order to be able to pay back the loan in the future)
        #  as well as the fees, which are paid immediately
        need_fiat = op_price + op_fee
        if asset.fiat < need_fiat:
            raise MessageException(
                "Not enough fiat to open LONG position on "
                f"{asset.symbol}: Wallet: {asset.fiat} Needed: {need_fiat}")
        new_loan = round(asset.fiat_loan + op_price, 3)
        if new_loan > FIAT_LOAN_LIMIT:
            raise MessageException(
                "Not enough allowance to open LONG position on "
                f"{asset.symbol}: Cur_loan: {asset.fiat_loan} "
                f"New loan: {new_loan} "
                f"Need allowance: {new_loan - FIAT_LOAN_LIMIT}")
        # Create the position instance
        p = Position(
            id=get_uuid(),
            type='MARGIN_LONG',
            status='OPEN',
            open_price=
            close_price,  # Position is opened at market close price (approximation)
            open_timestamp=day,  # Day OF THE SIMULATION!
            amount=amount,  # Collateral amount
            coll_loan=
            0.0,  # This is a MARGIN LONG trade so we don't borrow collateral
            fiat_loan=op_price,  # This is a MARGIN LONG trade so we borrow FIAT
            # Set default stop loss to 3% below opening price
            stop_loss=TradingService.apply_price_change(
                close_price, kwargs.get('stop_loss')),
            # Set a take profit at 5% gains
            take_profit=TradingService.apply_price_change(
                close_price, kwargs.get('take_profit')),
            last_price=close_price,  # Reference price for stop loss
            open_fee=op_fee)
        # Create the order instance
        o = Order(id=get_uuid(),
                  position_id=p.id,
                  timestamp=day,
                  type='OPEN_LONG',
                  amount=amount,
                  price=close_price,
                  detail=kwargs.get('detail'),
                  change=None)

        # Perform updates to the asset
        # Opening fee is paid immediately from FIAT wallet
        asset.fiat = round(asset.fiat - op_fee, 3)
        # Update fiat_loan counter with the amount we borrowed for this position
        asset.fiat_loan = round(asset.fiat_loan + op_price, 3)
        # Update collateral wallet with the amount we purchased
        asset.balance = round(asset.balance + amount, 8)
        # Update open long counter
        asset.num_long += 1
        # Append the position
        asset.positions.append(p)
        # Append open order
        asset.orders.append(o)

        # Update the asset instance in DB and return the result
        return self.repo.update(asset.id, asset)
Example #12
0
    def grid_search_new(self, symbol: str, dataset: str, target: str,
                        pipeline: str, split: float,
                        feature_selection_method: str, **kwargs):
        # Check if a model exists and has same search method
        existing_model = self.model_service.get_model(pipeline=pipeline,
                                                      dataset=dataset,
                                                      target=target,
                                                      symbol=symbol)
        if existing_model:
            mp_exists = ModelService.get_model_parameters(existing_model,
                                                          method='gridsearch')
            if mp_exists:
                if kwargs.get('replace'):
                    self.model_service.remove_parameters(model=existing_model,
                                                         method='gridsearch')
                else:
                    if kwargs.get('save'):
                        raise MessageException(
                            f"Grid search already performed for {pipeline}({dataset}.{symbol}) -> {target}"
                        )

        # Retrieve dataset to use
        ds = self.dataset_service.get_dataset(dataset, symbol)

        # Determine cv_splits=K for K-fold cross validation based on dataset's sample count
        # Train-test split for each fold is 80% train, the lowest training window for accurate results is 30 samples
        # so we need X samples where X is given by the proportion:
        #       30/0.8 = X/1; X= 30/0.8 = 37.5 ~ 40 samples per fold
        X = 40
        cv_splits = 5
        # If samples per fold with 5-fold CV are too low, use 3-folds
        if ds.count / cv_splits < X:
            cv_splits = 3
        # If samples are still too low, raise a value error
        if ds.count / cv_splits < X and not kwargs.get("permissive"):
            raise ValueError("Not enough samples to perform cross validation!")

        # Determine split indices based on dataset
        splits = DatasetService.get_train_test_split_indices(ds, split)
        cv_interval = splits['train']

        # Load dataset features by applying a specified feature selection method
        X = self.dataset_service.get_dataset_features(
            ds=ds,
            begin=cv_interval['begin'],
            end=cv_interval['end'],
            method=feature_selection_method,
            target=target)
        y = self.dataset_service.get_target(
            name=target,
            symbol=symbol,
            begin=cv_interval['begin'],
            end=cv_interval['end'],
        )

        # Check number of samples for each class in training data, if less than 3 instances are present for
        # each class, we're going to get a very unstable model (or no model at all for k-NN based algos)
        unique, counts = np.unique(y, return_counts=True)
        if len(unique) < 2:
            logging.error(
                "[{}-{}-{}-{}]Training data contains less than 2 classes: {}".
                format(symbol, dataset, target, pipeline, unique))
            raise MessageException(
                "Training data contains less than 2 classes: {}".format(
                    unique))
        logging.info("Dataset loaded: X {} y {} (unique: {})".format(
            X.shape, y.shape, unique))

        # Load pipeline algorithm and parameter grid
        pipeline_module = get_pipeline(pipeline)

        # Perform search
        gscv = GridSearchCV(
            estimator=pipeline_module.estimator,
            param_grid=kwargs.get('parameter_grid',
                                  pipeline_module.PARAMETER_GRID),
            # cv=BlockingTimeSeriesSplit(n_splits=mp.cv_splits),
            cv=StratifiedKFold(n_splits=cv_splits),
            scoring=get_precision_scorer(),
            verbose=kwargs.get("verbose", 0),
            n_jobs=kwargs.get("n_jobs", None),
            refit=False)

        mp = ModelParameters(cv_interval=splits['train'],
                             cv_splits=cv_splits,
                             task_key=kwargs.get('task_key', str(uuid4())),
                             features=[c for c in X.columns],
                             parameter_search_method='gridsearch')

        mp.start_at = get_timestamp()
        gscv.fit(X, y)
        mp.end_at = get_timestamp()

        # Collect results
        results_df = pd.DataFrame(gscv.cv_results_)

        mp.parameters = gscv.best_params_
        mp.cv_results = results_df.loc[:,
                                       results_df.columns != 'params'].to_dict(
                                           'records')

        tag = "{}-{}-{}-{}-{}".format(symbol, dataset, target, pipeline,
                                      dict_hash(mp.parameters))
        mp.result_file = 'cv_results-{}.csv'.format(tag)

        # Is there an existing model for this search?

        model = Model(pipeline=pipeline,
                      dataset=dataset,
                      target=target,
                      symbol=symbol,
                      features=feature_selection_method)
        model.parameters.append(mp)
        self.model_repo.create(model)

        # Save grid search results on storage
        if kwargs.get('save', True):
            storage_service.upload_json_obj(mp.parameters,
                                            'grid-search-results',
                                            'parameters-{}.json'.format(tag))
            storage_service.save_df(results_df, 'grid-search-results',
                                    mp.result_file)
        return mp
    def feature_selection_new(self, *, symbol: str, dataset: str, target: str,
                              split: float, method: str,
                              **kwargs) -> ModelFeatures:
        ds = self.dataset_service.get_dataset(dataset, symbol)
        fs_exists = DatasetService.has_feature_selection(ds=ds,
                                                         method=method,
                                                         target=target)
        if fs_exists:
            if kwargs.get('replace'):
                self.dataset_service.remove_feature_selection(ds=ds,
                                                              method=method,
                                                              target=target)
            else:
                if kwargs.get('save'):
                    raise MessageException(
                        f"Feature selection with method '{method}' alrady performed for '{dataset}.{symbol}' and target '{target}'"
                    )

        splits = DatasetService.get_train_test_split_indices(ds, split)
        fs = FeatureSelection(target=target,
                              method=method,
                              search_interval=splits['train'],
                              task_key=kwargs.get('task_key', str(uuid4())))

        # Load dataset
        X = self.dataset_service.get_dataset_features(
            ds=ds, begin=fs.search_interval.begin, end=fs.search_interval.end)
        y = self.dataset_service.get_dataset_target(
            name=fs.target,
            ds=ds,
            begin=fs.search_interval.begin,
            end=fs.search_interval.end)

        unique, counts = np.unique(y, return_counts=True)
        if len(unique) < 2:
            logging.error(
                "[{}-{}-{}]Training data contains less than 2 classes: {}".
                format(symbol, dataset, target, unique))
            raise MessageException(
                "Training data contains less than 2 classes: {}".format(
                    unique))

        # Perform search
        fs.start_at = get_timestamp()  # Log starting timestamp
        if not fs.method or 'importances' in fs.method:
            if '_cv' in fs.method:
                selector = select_from_model_cv(X, y)
            else:
                selector = select_from_model(X, y)
            fs.feature_importances = label_feature_importances(
                selector.estimator_, X.columns)
            if '_shap' in fs.method:
                fs.shap_values = get_shap_values(
                    model=selector.estimator_.named_steps.c, X=X, X_train=X)
                shap_values = parse_shap_values(fs.shap_values)
        elif fs.method == 'fscore':
            selector = select_percentile(X, y, percentile=10)
        elif fs.method == 'relieff':
            selector = select_relieff(X, y, percentile=10)
        elif fs.method == 'multisurf':
            selector = select_multisurf(X, y, percentile=10)
        else:
            raise NotFoundException(
                "Cannot find feature selection method by {}".format(fs.method))
        fs.end_at = get_timestamp()  # Log ending timestamp

        # Update search request with results
        fs.features = label_support(selector.get_support(), X.columns)

        if not kwargs.get('save'):
            return fs
        return self.dataset_service.append_feature_selection(ds, fs)
Example #14
0
 def _check_builder_args(self, builder_module, args):
     repl = [p for p in inspect.signature(builder_module.build).parameters if not 'args' in p]
     for p in repl:
         if p not in args.keys():
             raise MessageException('Missing Parameter {} in args!'.format(p))
     return repl
Example #15
0
    def test_model_new(self,
                       *,
                       pipeline: str,
                       dataset: str,
                       symbol: str,
                       target: str,
                       split=0.7,
                       step=None,
                       task_key=None,
                       window=None,
                       **kwargs):
        test_window = window or {'days': 90}
        model = self.get_model(pipeline=pipeline,
                               dataset=dataset,
                               symbol=symbol,
                               target=target)
        # for t in enumerate(model.tests):
        #     if t['window']['days'] == test_window['days']:
        #         if not kwargs.get('force'):
        #             logging.info(f"Model {pipeline}({dataset}.{symbol}) -> {target} "
        #                          f"test with window {test_window} already executed!")
        #             if kwargs.get('save'):
        #                 return t

        ds = self.dataset_service.get_dataset(dataset, symbol)
        splits = DatasetService.get_train_test_split_indices(ds, split)
        test_interval = splits['test']
        test_step = step or ds.interval

        # Parse model parameters: if it's a string, give it an interpretation
        parameters = kwargs.get('parameters')
        features = kwargs.get('features')
        mp = ModelService.get_model_parameters(m=model, method=parameters)
        if not mp:
            logging.warning(
                f"Parameter search with method {parameters} does not exist in model"
                f" {model.pipeline}({model.dataset}.{model.symbol}) -> {model.target}"
            )

        # Get training data including the first training window
        begin = sub_interval(timestamp=test_interval["begin"],
                             interval=test_window)
        end = add_interval(timestamp=test_interval["end"], interval=test_step)
        if from_timestamp(ds.valid_index_min).timestamp() > from_timestamp(
                begin).timestamp():
            raise MessageException(
                f"Not enough data for training with window {test_window}!"
                f" {model.pipeline}({model.dataset}.{model.symbol}) -> {model.target}"
            )
        test_X, test_y = self.dataset_service.get_x_y(dataset, symbol, target,
                                                      features, begin, end)
        # Slice testing interval in "sliding" windows
        windows = [
            (b, e)
            for b, e in timestamp_windows(begin, end, test_window, test_step)
        ]

        # Fit the models and make predictions
        storage_service.create_bucket(bucket='fit-estimators')

        _n_jobs = int(kwargs.get('n_jobs', cpu_count() / 2))
        logging.info(
            f"Fitting {len(windows)} estimators with {_n_jobs} threads..")
        fit_estimators = Parallel(n_jobs=_n_jobs)(
            delayed(fit_estimator_new)(model=model,
                                       mp=mp,
                                       features=features,
                                       day=e,
                                       window=test_window,
                                       X=test_X,
                                       y=test_y,
                                       b=b,
                                       e=e,
                                       force=not kwargs.get('save'))
            for b, e in tqdm(windows))

        logging.info(
            f"Saving {len(windows)} fit estimators with {_n_jobs} threads..")
        estimator_names = Parallel(n_jobs=_n_jobs)(
            delayed(save_estimator)(estimator=est, )
            for est in tqdm(fit_estimators))

        # logging.info(f"Loading {len(windows)} estimators with {_n_jobs} threads..")
        # load_estimators = Parallel(n_jobs=_n_jobs)(
        #     delayed(load_estimator)(
        #         model=model,
        #         day=e,
        #         window=window,
        #         parameters=parameters,
        #         features=features
        #     )
        #     for b, e in tqdm(windows))

        logging.info(
            f"Predicing {len(windows)} estimators with {_n_jobs} threads..")
        prediction_results = Parallel(n_jobs=_n_jobs)(
            delayed(predict_estimator_day)(estimator=est,
                                           day=est.day,
                                           X=test_X[est.begin:est.end],
                                           y=test_y[est.begin:est.end])
            for est in tqdm(fit_estimators))

        results = [r for r in prediction_results if r is not None]
        df = pd.DataFrame(results)
        if df.empty:
            raise MessageException("TestWindows: Empty result dataframe!")
        #df.time = pd.to_datetime(df.time)
        #df = df.set_index('time')

        classification_records = [r for r in df.to_dict(orient='records')]
        # If save is true, save test instance and parameters
        mt = ModelTest(
            window=test_window,
            step=test_step,
            parameters=mp.parameters,
            features=[c for c in test_X.columns],
            test_interval=splits['test'],
            task_key=task_key or str(uuid4()),
            classification_results=classification_records,
        )
        # Populate classification report fields
        clf_report = flattened_classification_report_imbalanced(
            df.label, df.predicted)
        roc_report = roc_auc_report(
            df.label, df.predicted,
            df[[c for c in df.columns if '_proba_' in c]])
        clf_report.update(roc_report)
        mt.classification_report = clf_report

        # Save test into the model
        if kwargs.get('save'):
            return self.model_repo.append_test(model.id, mt)
        return mt
    def close_short(self, asset: Asset, day: str, close_price: float,
                    position: Position, **kwargs):
        # Get the index of this position inside the asset's positions so we can replace it with the updated one
        index = TradingService.get_position_index(asset, position)
        if index is None:
            raise MessageException(
                f"SHORT Position {position.id} is not related to asset {asset.symbol} with ID {asset.id}!"
            )

        # In order to calculate the rolling fee, we need to apply the fees to the loan amount
        # then multiply it by the number of days the loan was active
        num_days = timestamp_diff(
            day, position.open_timestamp
        ) / 86400  # Number of days this position was open
        sell_fee = round(position.coll_loan * MARGIN_LONG_DAILY_FEE * num_days,
                         8)
        # Since we buy back our loan at spot market, we need to add SPOT fee
        spot_fee = round((position.coll_loan + sell_fee) * SPOT_FIXED_FEE, 8)
        # Sell fee is in collateral
        sell_fee = sell_fee + spot_fee

        # Total buyback price is sell fee (which is loan interest + spot fee) + loan amount
        buyback_price = round((sell_fee + position.coll_loan) * close_price, 3)

        if asset.fiat < buyback_price:
            raise MessageException(
                "Not enough fiat to close SHORT position on "
                f"{asset.symbol}: Wallet: {asset.fiat} Needed: {buyback_price}"
            )

        # Position profit is opening price minus buyback price
        open_revenue = round(position.open_price * position.amount,
                             3) - position.open_fee
        booked_profit = open_revenue - buyback_price

        # Update the position with closing details
        position.status = 'CLOSED'
        position.close_price = close_price
        position.close_timestamp = day
        position.close_fee = sell_fee
        position.price_change = TradingService.get_percent_change(
            position.close_price, position.open_price)
        position.profit = booked_profit

        # Create the order instance
        o = Order(id=get_uuid(),
                  position_id=position.id,
                  timestamp=day,
                  type='CLOSE_SHORT',
                  amount=position.amount,
                  price=close_price,
                  detail=kwargs.get('detail'),
                  change=TradingService.get_percent_change(
                      position.close_price, position.open_price))

        # Perform updates to the asset
        # Opening fee is paid immediately from FIAT wallet
        asset.fiat = round(asset.fiat - buyback_price, 3)
        # Update fiat_loan counter with the amount we borrowed for this position
        asset.coll_loan = round(asset.coll_loan - position.coll_loan, 8)

        # Update open long counter
        asset.num_short -= 1
        # Update the position by index
        asset.positions[index] = position
        # Append close order
        asset.orders.append(o)

        return self.repo.update(asset.id, asset)
    def open_short(self, asset: Asset, day: str, close_price: float,
                   size: float, **kwargs):
        # Get operation's fiat value and collateral amount involved in this position
        op_value, amount = TradingService.get_position_size_amount(
            asset, close_price, size)

        # Margin short trades incur an opening fixed fee, which is paid immediately
        #  Since SHORT fees are paid in collateral, we need to buy some in SPOT, and pay fees for it as well!
        op_fee = amount * MARGIN_SHORT_FIXED_FEE
        op_fee = round(op_fee + op_fee * SPOT_FIXED_FEE, 8)
        # Convert final opening fee in FIAT
        op_fee = round(op_fee * close_price, 3)

        # for short orders, FIAT wallet should hold 1.5x the position price as per most of the exchanges' rules
        #  but we relapse this and approximate to 1.0x plus opening fees
        need_fiat = op_value + op_fee
        if asset.fiat < need_fiat:
            raise MessageException(
                "Not enough fiat to open SHORT position on "
                f"{asset.symbol}: Wallet: {asset.fiat} Needed: {need_fiat}")
        new_loan = round(asset.coll_loan + amount, 8)
        coll_loan_ub = COLL_LOAN_LIMIT / close_price
        if new_loan > coll_loan_ub:
            raise MessageException(
                "Not enough allowance to open SHORT position on "
                f"{asset.symbol}: Cur_loan: {asset.coll_loan} "
                f"New loan: {new_loan} "
                f"Upperbound for collateral loan: {coll_loan_ub} "
                f"Need allowance: {new_loan - coll_loan_ub}")
        # In short orders, we immediately sell borrowed collateral for fiat at market price
        # Since fees are paid immediately, they are paid from this sale's revenue
        sell_revenue = op_value - op_fee

        # Create the position instance
        p = Position(
            id=get_uuid(),
            type='MARGIN_SHORT',
            status='OPEN',
            open_price=
            close_price,  # Position is opened at market close price (approximation)
            open_timestamp=day,  # Day OF THE SIMULATION!
            amount=amount,  # Collateral amount
            coll_loan=
            amount,  # This is a MARGIN SHORT trade so we borrow collateral
            fiat_loan=0.0,  # This is a MARGIN LONG trade so we don't borrow FIAT
            # Set default stop loss to 5% above opening price
            stop_loss=TradingService.apply_price_change(
                close_price, kwargs.get('stop_loss')),
            # Set a take profit at 5% profit
            take_profit=TradingService.apply_price_change(
                close_price, kwargs.get('take_profit')),
            last_price=close_price,  # Reference price for stop loss
            open_fee=op_fee)
        # Create the order instance
        o = Order(id=get_uuid(),
                  position_id=p.id,
                  timestamp=day,
                  type='OPEN_SHORT',
                  amount=amount,
                  price=close_price,
                  detail=kwargs.get('detail'),
                  change=None)

        # Perform updates to the asset
        # Opening fee is paid immediately from FIAT wallet
        asset.fiat = round(asset.fiat + sell_revenue, 3)
        # Update coll_loan counter with the amount we borrowed for this position
        asset.coll_loan = round(asset.coll_loan + amount, 8)

        # Update open short counter
        asset.num_short += 1
        # Append the position
        asset.positions.append(p)
        # Append open order
        asset.orders.append(o)

        # Update the asset instance in DB and return the result
        return self.repo.update(asset.id, asset)
    def close_long(self, asset: Asset, day: str, close_price: float,
                   position: Position, **kwargs):
        # Get the index of this position inside the asset's positions so we can replace it with the updated one
        index = TradingService.get_position_index(asset, position)
        if index is None:
            raise MessageException(
                f"LONG Position {position.id} is not related to asset {asset.symbol} with ID {asset.id}!"
            )

        if asset.balance < position.amount:
            raise MessageException(
                "Not enough collateral to close LONG position on "
                f"{asset.symbol}: Wallet: {asset.balance} Needed: {position.amount}"
            )

        # In order to calculate the rolling fee, we need to apply the fees to the loan amount
        # then multiply it by the number of days the loan was active
        num_days = timestamp_diff(
            day, position.open_timestamp
        ) / 86400  # Number of days this position was open
        sell_fee = round(position.fiat_loan * MARGIN_LONG_DAILY_FEE * num_days,
                         3)

        # Calculate the revenue from selling the tokens
        sell_revenue = round(position.amount * close_price, 3)
        # Pay back loan + interest to obtain booked profit for this position
        booked_profit = round(sell_revenue - position.fiat_loan - sell_fee, 3)

        # Update the position with closing details
        position.status = 'CLOSED'
        position.close_price = close_price
        position.close_timestamp = day
        position.close_fee = sell_fee
        position.price_change = TradingService.get_percent_change(
            position.close_price, position.open_price)
        position.profit = booked_profit

        # Create the order instance
        o = Order(id=get_uuid(),
                  position_id=position.id,
                  timestamp=day,
                  type='CLOSE_LONG',
                  amount=position.amount,
                  price=close_price,
                  detail=kwargs.get('detail'),
                  change=TradingService.get_percent_change(
                      position.close_price, position.open_price))

        # Perform updates to the asset
        asset.fiat = round(asset.fiat + booked_profit, 3)
        # Update fiat_loan counter with the amount we borrowed for this position
        asset.fiat_loan = round(asset.fiat_loan - position.fiat_loan, 3)
        # Remove the collateral we sold from balance
        asset.balance = round(asset.balance - position.amount, 8)
        # Update open long counter
        asset.num_long -= 1
        # Update the position by index
        asset.positions[index] = position
        # Append close order
        asset.orders.append(o)

        return self.repo.update(asset.id, asset)