예제 #1
0
from sklearn.linear_model import LogisticRegression

from skrobot.core import Experiment
from skrobot.tasks import HyperParametersSearchCrossValidationTask

######### Initialization Code

random_seed = 42

lr_estimator = LogisticRegression(solver='liblinear', random_state=random_seed)

######### skrobot Code

# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(
    __file__).set_experimenter('echatzikyriakidis').build()

# Run Hyperparameters Search Task
results = experiment.run(
    HyperParametersSearchCrossValidationTask(
        estimator=lr_estimator,
        search_params={
            "C": [1.e-01, 1.e+00, 1.e+01],
            "penalty": ["l1", "l2"]
        },
        train_data_set_file_path=path.join('data',
                                           'money-laundering-data-train.csv'),
        random_seed=random_seed).grid_search().custom_folds(
            folds_file_path=path.join('data', 'money-laundering-folds.csv')))

# Print in-memory results
test_data_set_file_path = path.join('data','money-laundering-data-test.csv')

new_data_set_file_path = path.join('data','money-laundering-data-new.csv')

folds_file_path = path.join('data', 'money-laundering-folds.csv')

random_seed = 42

lr_estimator = LogisticRegression(solver='liblinear', random_state=random_seed)

search_params = { "C" : [1.e-01, 1.e+00, 1.e+01], "penalty" : [ "l1", "l2" ] }

######### skrobot Code

# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(__file__).set_experimenter('echatzikyriakidis').build()


# Run Feature Selection Task
features_columns = experiment.run(FeatureSelectionCrossValidationTask (estimator=lr_estimator,
                                                                       train_data_set_file_path=train_data_set_file_path,
                                                                       random_seed=random_seed).custom_folds(folds_file_path=folds_file_path))

# Run Hyperparameters Search Task
hyperparameters_search_results = experiment.run(HyperParametersSearchCrossValidationTask (estimator=lr_estimator,
                                                                                          search_params=search_params,
                                                                                          train_data_set_file_path=train_data_set_file_path,
                                                                                          feature_columns=features_columns,
                                                                                          random_seed=random_seed).random_search().custom_folds(folds_file_path=folds_file_path))

# Run Evaluation Task	
예제 #3
0
search_params = {
    'classifier__max_iter': [20, 50, 80],
    'classifier__alpha': [0.00001, 0.000001],
    'classifier__penalty': ['l2', 'elasticnet'],
    "vectorizer__stop_words": ["english", None],
    "vectorizer__ngram_range": [(1, 1), (1, 2)],
    "vectorizer__max_df": [0.5, 0.75, 1.0],
    "tfidf__use_idf": [True, False],
    "tfidf__norm": ['l1', 'l2'],
    "feature_selection__percentile": [70, 60, 50]
}

######### skrobot Code

# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(
    __file__).set_experimenter('echatzikyriakidis').build()

# Run Hyperparameters Search Task
hyperparameters_search_results = experiment.run(
    HyperParametersSearchCrossValidationTask(
        estimator=pipe,
        search_params=search_params,
        train_data_set_file_path=train_data_set_file_path,
        field_delimiter=field_delimiter,
        random_seed=random_seed).random_search().stratified_folds(
            total_folds=5, shuffle=True))

# Run Evaluation Task
evaluation_results = experiment.run(
    EvaluationCrossValidationTask(
        estimator=pipe,
예제 #4
0
from os import path

from sklearn.linear_model import LogisticRegression

from skrobot.core import Experiment
from skrobot.tasks import FeatureSelectionCrossValidationTask

######### Initialization Code

random_seed = 42

lr_estimator = LogisticRegression(solver='liblinear', random_state=random_seed)

######### skrobot Code

# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(
    __file__).set_experimenter('echatzikyriakidis').build()

# Run Feature Selection Task
features_columns = experiment.run(
    FeatureSelectionCrossValidationTask(
        estimator=lr_estimator,
        train_data_set_file_path=path.join('data',
                                           'money-laundering-data-train.csv'),
        random_seed=random_seed).custom_folds(
            folds_file_path=path.join('data', 'money-laundering-folds.csv')))

# Print in-memory results
print(features_columns)
예제 #5
0
from skrobot.tasks import TrainTask

######### Initialization Code

random_seed = 42

lr_estimator = LogisticRegression(solver='liblinear', random_state=random_seed)

######### skrobot Code


# Define a Notifier (This is optional and you can implement any notifier you want, e.g. for Slack / Trello / Discord)
class ConsoleNotifier(BaseNotifier):
    def notify(self, message):
        print(message)


# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(
    __file__).set_experimenter('echatzikyriakidis').set_notifier(
        ConsoleNotifier()).build()

# Run Train Task
results = experiment.run(
    TrainTask(estimator=lr_estimator,
              train_data_set=path.join('data',
                                       'money-laundering-data-train.csv'),
              random_seed=random_seed))

# Print in-memory results
print(results['estimator'])
    "preprocessor__numerical_transformer__imputer__strategy":
    ["mean", "median"]
}

######### skrobot Code


# Define a Notifier (This is optional and you can implement any notifier you want, e.g. for Slack / Trello / Discord)
class ConsoleNotifier(BaseNotifier):
    def notify(self, message):
        print(message)


# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(
    __file__).set_experimenter('echatzikyriakidis').set_notifier(
        ConsoleNotifier()).build()

# Run Feature Selection Task
features_columns = experiment.run(
    FeatureSelectionCrossValidationTask(
        estimator=classifier,
        train_data_set_file_path=train_data_set_file_path,
        preprocessor=preprocessor,
        min_features_to_select=4,
        id_column=id_column,
        label_column=label_column,
        random_seed=random_seed).stratified_folds(total_folds=5, shuffle=True))

pipe = Pipeline(
    steps=[('preprocessor',
예제 #7
0
from sklearn.linear_model import LogisticRegression

from skrobot.core import Experiment
from skrobot.tasks import EvaluationCrossValidationTask

######### Initialization Code

random_seed = 42

lr_estimator = LogisticRegression(solver='liblinear', random_state=random_seed)

######### skrobot Code

# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(__file__).set_experimenter('echatzikyriakidis').build()

# Run Evaluation Task
results = experiment.run(EvaluationCrossValidationTask(estimator=lr_estimator,
                                                       train_data_set_file_path=path.join('data','money-laundering-data-train.csv'),
                                                       test_data_set_file_path=path.join('data','money-laundering-data-test.csv'),
                                                       export_classification_reports=True,
                                                       export_confusion_matrixes=True,
                                                       export_pr_curves=True,
                                                       export_roc_curves=True,
                                                       export_false_positives_reports=True,
                                                       export_false_negatives_reports=True,
                                                       export_also_for_train_folds=True,
                                                       random_seed=random_seed).custom_folds(folds_file_path=path.join('data','money-laundering-folds.csv')))

# Print in-memory results
예제 #8
0
    ["mean", "median"]
}

######### skrobot Code

# Create a Notifier
notifier = EmailNotifier(email_subject="skrobot notification",
                         sender_account=os.environ['EMAIL_SENDER_ACCOUNT'],
                         sender_password=os.environ['EMAIL_SENDER_PASSWORD'],
                         smtp_server=os.environ['EMAIL_SMTP_SERVER'],
                         smtp_port=os.environ['EMAIL_SMTP_PORT'],
                         recipients=os.environ['EMAIL_RECIPIENTS'])

# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(
    __file__).set_experimenter('echatzikyriakidis').set_notifier(
        notifier).build()

# Run Deep Feature Synthesis Task
feature_synthesis_results = experiment.run(
    DeepFeatureSynthesisTask(
        entities={"passengers": (raw_data_set, id_column)},
        target_entity="passengers",
        trans_primitives=['add_numeric', 'multiply_numeric'],
        export_feature_information=True,
        export_feature_graphs=True,
        label_column=label_column))

data_set = feature_synthesis_results['synthesized_dataset']

feature_defs = feature_synthesis_results['feature_definitions']
예제 #9
0
products_df = data["products"]

entities = {
   "customers" : (customers_df, "customer_id"),
   "sessions" : (sessions_df, "session_id", "session_start"),
   "transactions" : (transactions_df, "transaction_id", "transaction_time"),
   "products" : (products_df, "product_id")
}

relationships = [
   ("sessions", "session_id", "transactions", "session_id"),
   ("products", "product_id", "transactions", "product_id"),
   ("customers", "customer_id", "sessions", "customer_id")
]

######### skrobot Code

# Build an Experiment
experiment = Experiment('experiments-output').set_source_code_file_path(__file__).set_experimenter('echatzikyriakidis').build()

# Run Deep Feature Synthesis Task
feature_synthesis_results = experiment.run(DeepFeatureSynthesisTask (entities=entities,
                                                                     relationships=relationships,
                                                                     target_entity="transactions",
                                                                     export_feature_information=True,
                                                                     export_feature_graphs=True,
                                                                     label_column='amount'))

# Print in-memory results
print(feature_synthesis_results['synthesized_dataset'])
print(feature_synthesis_results['feature_definitions'])