datasets = DatasetService()
    query = {
        "dataset": "merged_new",
        "target": "class"
    }
    all_models = models.query_models(query=query)
    for m in all_models:
        ds = datasets.get_dataset(name=m.dataset, symbol=m.symbol)
        fs = DatasetService.get_feature_selection(ds=ds, method='importances_shap', target=m.target)
        if not fs:
            logging.error(f"Dataset {m.dataset}{m.symbol} -> {m.target} does not have feature selection")
            continue

        if not m.parameters:
            logging.error(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} does not have parameters")
            continue

        for mp in m.parameters:
            count = 0
            for f in mp.features:
                if not f in fs.features:
                    logging.error(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} parameter search done without fixing features!")
                else:
                    count += 1
            logging.info(f"Model {m.pipeline}({m.dataset}{m.symbol}) -> {m.target} GRIDSEARCH {mp.parameter_search_method} done with {count} features")


if __name__ == '__main__':
    setup_file_logger('check_models.log')
    typer.run(main)
import typer
from cryptoml_core.services.model_service import ModelService
from typing import Optional
from cryptoml_core.logging import setup_file_logger


def main(dataset: str, pipeline: str):
    models = ModelService()
    query = {'type': "FEATURES"}
    if dataset != 'all':
        query['name'] = dataset
    items = models.create_classification_models(query=query, pipeline=pipeline)
    print(items)


if __name__ == '__main__':
    setup_file_logger('create_models.log')
    typer.run(main)
import logging
import json


def main(queryfile: str, features: Optional[str] = None, halving: Optional[bool] = False, save: Optional[bool] = True):
    service = GridSearchService()
    models = ModelService()
    with open(queryfile, 'r') as f:
        query = json.load(f)
    # if save:
    #     models.clear_parameters(query)
    # else:
    #     logging.info("Results will not  be saved [--no-save]")
    search_models = models.query_models(query)
    models.clear_parameters(query)
    logging.info("[i] {} models to train".format(len(search_models)))
    for i, m in enumerate(search_models):
        # if m.parameters:
        #     logging.info("==[{}/{}]== MODEL: {} {} {} {} ==> SKIP".format(i+1, len(search_models), m.symbol, m.dataset, m.target, m.pipeline))
        #     continue  # Skip this as search has already been performed
        logging.info("==[{}/{}]== MODEL: {} {} {} {} =====".format(i+1, len(search_models), m.symbol, m.dataset, m.target, m.pipeline))
        mp = service.create_parameters_search(m, split=0.7, features=features)
        logging.info("[{}] Start grid search".format(get_timestamp()))
        mp = service.grid_search(m, mp, sync=True, verbose=1, n_jobs=8, halving=halving, save=save)
        logging.info("[{}] End grid search".format(get_timestamp()))


if __name__ == '__main__':
    setup_file_logger('grid_search.log')
    typer.run(main)
Beispiel #4
0
import typer
from cryptoml_core.services.feature_selection import FeatureSelectionService
from cryptoml_core.util.timestamp import get_timestamp
from cryptoml_core.logging import setup_file_logger
import logging
from typing import Optional


def main(dataset: str, target: str, method: str, split: Optional[float] = 0.7, replace: Optional[bool] = False, save: Optional[bool] = True):
    service = FeatureSelectionService()

    symbols = service.get_available_symbols(dataset)
    for i, sym in enumerate(symbols):
        logging.info("==[{}/{}]== Dataset: {} {} {} =====".format(i+1, len(symbols), sym, dataset, target))
        logging.info("[{}] Start feature search".format(get_timestamp()))
        mf = service.feature_selection_new(
            symbol=sym,
            dataset=dataset,
            target=target,
            split=split,
            method=method,
            replace=replace,
            save=save
        )
        logging.info("[{}] End feature search".format(get_timestamp()))


if __name__ == '__main__':
    setup_file_logger('feature_selection_new.log')
    typer.run(main)
Beispiel #5
0
            # Test T1
            # logging.info("[{}] {} Start T1".format(get_timestamp(), m.symbol))
            # models.test_model(m, t1, sync=True)
            # Test T2
            logging.info("[{}] {} Start T2".format(get_timestamp(), m.symbol))
            models.test_model(m, t2, sync=True)
            # Test T3
            logging.info("[{}] {} Start T3".format(get_timestamp(), m.symbol))
            models.test_model(m, t3, sync=True)
            logging.info("[{}] {} Start T4".format(get_timestamp(), m.symbol))
            models.test_model(m, t4, sync=True)
        except MessageException as e:
            logging.error("[!] " + e.message)
            # failed.append((m.dict(), t1.dict(), t2.dict(), t3.dict()))
            failed.append((m.dict(), t2.dict(), t3.dict(), t4.dict()))
            pass
        except Exception as e:
            logging.exception("[!] " + str(e))
            # failed.append((m.dict(), t1.dict(), t2.dict(), t3.dict()))
            failed.append((m.dict(), t2.dict(), t3.dict(), t4.dict()))
            pass

        logging.info("[{}] Done".format(m.symbol))
    with open('test-failed.json', 'w') as f:
        json.dump(failed, f)


if __name__ == '__main__':
    setup_file_logger('test_model.log')
    typer.run(main)
Beispiel #6
0
from typing import Optional
from cryptoml_core.logging import setup_file_logger

app = typer.Typer()


@app.command()
def load(bucket: str, filename: str, dataset: str, symbol: str):
    print("Importing {}/{} to {}:{}".format(bucket, filename, dataset, symbol))
    service = DatasetService()
    service.import_from_storage(bucket, filename, dataset, symbol)
    print("Done")


@app.command()
def build(symbol: str, builder: str, ohlcv: str, coinmetrics: str):
    build_args = {'ohlcv': ohlcv, 'coinmetrics': coinmetrics}
    print("Building {} [{} -> {}]".format(symbol, build_args, builder))
    bs = DatasetBuildingService()
    bs.check_builder_args(builder, build_args)
    df = bs.build_dataset(symbol, builder, build_args)
    ds = DatasetService()
    ds.create_dataset(df, builder, symbol,
                      'FEATURES' if builder != 'target' else 'TARGET')
    print("Done")


if __name__ == '__main__':
    setup_file_logger('manage_dataset.log')
    app()
Beispiel #7
0
                # "roc_auc_ovr_macro": report["roc_auc_ovr_macro"] if report else np.nan,
                # "roc_auc_ovr_weighted": report["roc_auc_ovr_weighted"] if report else np.nan,
                # Averages
                "precision_avg": report["avg_pre"] if report else np.nan,
                "recall_avg": report["avg_rec"] if report else np.nan,
                "specificity_avg": report["avg_spe"] if report else np.nan,
                "f1-score_avg": report["avg_f1"] if report else np.nan,
                "geometric_mean_avg": report["avg_geo"] if report else np.nan,
                "index_balanced_accuracy_avg": report["avg_iba"] if report else np.nan,

            })
        df = pd.DataFrame(result)
        # Plot to XLSX with conditional formatting by coolwarm color map,
        # ordering by ascending accuracy
        df.sort_values(by='precision_avg', ascending=True)\
            .style.background_gradient(cmap=cm.get_cmap('coolwarm')) \
            .format(None, na_rep="-")\
            .to_excel(writer, sheet_name=symbol, index_label="#", float_format = "%0.3f")
        # Adjust column width
        for column in df:
            column_length = max(df[column].astype(str).map(len).max(), len(column))
            col_idx = df.columns.get_loc(column) + 1
            writer.sheets[symbol].set_column(col_idx, col_idx, column_length)
    writer.close()



if __name__ == '__main__':
    setup_file_logger('get_tests.log')
    typer.run(main)