Example #1
0
    def fitness_func(chromosome: Chromosome) -> float:
        fitness = 0.0
        create_tf_session()
        params = chromosome.values

        print("Current Chromosome Params:")
        print(params)
        model = Model(train_x,
                      train_y,
                      validation_x,
                      validation_y,
                      preprocessor.get_seq_info_str(),
                      architecture=Architecture.GRU.value,
                      is_bidirectional=False,
                      batch_size=round(params["batch_size"]),
                      hidden_layers=round(params["hidden_layers"]),
                      neurons_per_layer=round(params["neurons_per_layer"]),
                      dropout=params["dropout"],
                      initial_learn_rate=params["initial_learn_rate"],
                      is_classification=IS_CLASSIFICATION)
        model.train()
        if IS_CLASSIFICATION:
            fitness = -model.score["sparse_categorical_crossentropy"]
            chromosome.other["sparse_categorical_crossentropy"] = model.score[
                "sparse_categorical_crossentropy"]
            chromosome.other["accuracy"] = model.score["accuracy"]
        else:
            fitness = model.score["RSquaredMetric"]
            chromosome.other["mae"] = model.score["mae"]

        ## Cleanup
        del model
        K.clear_session()

        return fitness
def train_model():
    preprocessor = DataPreprocesser(
        f"{os.environ['WORKSPACE']}/data/crypto/{SYMBOL_TO_PREDICT}.parquet",
        col_names=["open", "high", "low", "close", "volume"],
        forecast_col_name="close",
        sequence_length=100,
        forecast_period=10,
        is_classification=IS_CLASSIFICATION)
    if not preprocessor.has_loaded and SHOULD_USE_INDICATORS:
        indicator_df = get_select_indicator_values(preprocessor.df_original)
        preprocessor.change_data(indicator_df)
        preprocessor.print_df()
        preprocessor.print_df_no_std()

    preprocessor.preprocess()

    train_x, train_y = preprocessor.get_train()
    validation_x, validation_y = preprocessor.get_validation()

    # Best hyperparams from GA:
    # batch_size=1534,
    # hidden_layers=2,
    # neurons_per_layer=60,
    # dropout=0.4714171367290059,
    # initial_learn_rate=0.003725545984696872,

    model = Model(train_x,
                  train_y,
                  validation_x,
                  validation_y,
                  preprocessor.get_seq_info_str(),
                  architecture=Architecture.GRU.value,
                  is_bidirectional=False,
                  batch_size=1534,
                  hidden_layers=2,
                  neurons_per_layer=60,
                  dropout=0.4164892976819227,
                  initial_learn_rate=0.004587161725770879,
                  early_stop_patience=10,
                  max_epochs=100,
                  is_classification=IS_CLASSIFICATION)

    preprocessor.print_dataset_totals()
    del preprocessor  # Save memory

    model.train()
    model.save_model()
def multi_test(SYMBOL_TO_PREDICT: str):
    params = {
        "architecture": Architecture.GRU.value,
        "is_bidirectional": False,
        "indicators": False,
        "sequence_length": 200,
        "forecast_period": 10,
    }

    tests = [
        # Format:
        # (num_repetitions, {"param to change": new_value})
        (5, {"architecture": Architecture.LSTM.value, "is_bidirectional": False,}),
        (5, {"architecture": Architecture.GRU.value, "is_bidirectional": False,}),
        (5, {"architecture": Architecture.LSTM.value, "is_bidirectional": True,}),
        (5, {"architecture": Architecture.GRU.value, "is_bidirectional": True,}),
        (5, {"indicators": False}),
        (5, {"indicators": True}),
        (1, {"sequence_length": 50}),
        (1, {"sequence_length": 100}),
        (1, {"sequence_length": 150}),
        (1, {"sequence_length": 200}),
        (1, {"sequence_length": 250}),
        (1, {"sequence_length": 300}),
        (1, {"sequence_length": 350}),
        (1, {"sequence_length": 400}),
        (1, {"forecast_period": 1}),
        (1, {"forecast_period": 5}),
        (1, {"forecast_period": 10}),
        (1, {"forecast_period": 20}),
        (1, {"forecast_period": 30}),
        (1, {"forecast_period": 50}),
    ]

    preprocessor = DataPreprocesser(
        f"{os.environ['WORKSPACE']}/data/crypto/{SYMBOL_TO_PREDICT}.parquet",
        col_names=["open", "high", "low", "close", "volume"],
        forecast_col_name="close",
        forecast_period=params["forecast_period"],
        sequence_length=params["sequence_length"],
        should_ask_load=False,
        is_classification=IS_CLASSIFICATION
    )
    preprocessor.preprocess()

    folder = f"{os.environ['WORKSPACE']}/results/tests"
    for test_num, test in enumerate(tests):
        test_num = test_num
        num_repeats = test[0]
        additional_params = test[1]
        new_params = {**params, **additional_params}

        pre = None
        try: pre = preprocessor
        except: pass
        if "sequence_length" in additional_params or "forecast_period" in additional_params or "indicators" in additional_params:
            try:
                del preprocessor
                del pre
            except: pass
            pre = DataPreprocesser(
                f"{os.environ['WORKSPACE']}/data/crypto/{SYMBOL_TO_PREDICT}.parquet",
                col_names=["open", "high", "low", "close", "volume"],
                forecast_col_name="close",
                forecast_period=new_params["forecast_period"],
                sequence_length=new_params["sequence_length"],
                should_ask_load=False,
                is_classification=IS_CLASSIFICATION
            )
            pre.preprocess()
            if new_params["indicators"]:
                indicator_df = get_select_indicator_values(pre.df_original)
                pre.change_data(indicator_df)
                pre.print_df()
                pre.print_df_no_std()

        for i in range(num_repeats):
            create_tf_session()
            print(f"Test {test_num} repeat {i}")
            print("Params")
            print(new_params)

            train_x, train_y = pre.get_train()
            validation_x, validation_y = pre.get_validation()
            model = Model(
                train_x, train_y, validation_x, validation_y,
                pre.get_seq_info_str(),
                architecture=new_params["architecture"],
                is_bidirectional=new_params["is_bidirectional"],
                batch_size=1024,
                hidden_layers=2,
                neurons_per_layer=100,
                dropout=0.2,
                initial_learn_rate=0.001,
                is_classification=IS_CLASSIFICATION
            )
            model.train()

            train_time = float(model.train_time) / 60.0 # In Minutes
            
            file_path = f"{folder}/test{test_num}.csv"

            if not os.path.exists(file_path):
                with open(file_path, 'a') as file:
                    if IS_CLASSIFICATION:
                        file.write("Accuracy,Sparse Categorical Crossentropy,Train Time\n")
                    else:
                        file.write("R Square,MAE,Train Time\n")

            with open(file_path, 'a') as file:
                if IS_CLASSIFICATION:
                    file.write(f"{model.score['accuracy']},{model.score['sparse_categorical_crossentropy']},{train_time}\n")
                else:
                    file.write(f"{model.score['RSquaredMetric']},{model.score['mae']},{train_time}\n")

            ## Cleanup
            del model
            K.clear_session()