コード例 #1
0
def build_automl_config(is_local_training, user_automl_settings,
                        training_dataset, compute_target):
    featurization_config = FeaturizationConfig()
    featurization_config.add_column_purpose('passengerCount', 'Numeric')

    fixed_automl_settings = {
        'task': 'regression',
        'label_column_name': 'duration',
        'verbosity': logging.INFO,
        'preprocess': False,
        'model_explainability': True,
        'featurization': featurization_config
    }

    if is_local_training:
        automl_config = AutoMLConfig(training_data=training_dataset,
                                     **fixed_automl_settings,
                                     **user_automl_settings)
    else:
        automl_config = AutoMLConfig(path=CODE_PATH,
                                     training_data=training_dataset,
                                     compute_target=compute_target,
                                     **fixed_automl_settings,
                                     **user_automl_settings)

    return automl_config
コード例 #2
0
def RunAutoML():   
    automl_settings = {
    "name": "AutoML_Demo_Experiment",
    "iteration_timeout_minutes": 15,
    "iterations": 3,
    "n_cross_validations": 5,
    "primary_metric": 'r2_score',
    "preprocess": False,
    "max_concurrent_iterations": 8,
    "verbosity": logging.INFO
    }
    subscription_id = request.json['subscription_id']
    print(userData)
    print(userData[subscription_id])
    #return "ok"
    try:
        automl_config = AutoMLConfig(task="classification",
                        X=userData[subscription_id][1],
                        y=userData[subscription_id][2],
                        debug_log='automl_errors.log',
                        preprocess=True,
                        **automl_settings,
                        )
        experiment=Experiment(userData[subscription_id][0], 'automl_remote')
        run = experiment.submit(automl_config, show_output=True)
        run
        best_model,fitted_model = run.get_output()

        return 'ok'
    except:
        return 'error'  
コード例 #3
0
ファイル: train.py プロジェクト: Finn2019/testFB
def train_model(train_data, cpu_cluster, exp):

    target_column_name = 'Gross_Sales'
    time_column_name = 'Date'
    time_series_id_column_names = 'Location'
    max_horizon = 1

    forecasting_parameters = ForecastingParameters(
        time_column_name=time_column_name,
        forecast_horizon=1,
        time_series_id_column_names=time_series_id_column_names
    )

    automl_config = AutoMLConfig(
        task='forecasting',
        debug_log='automl_daily_gross_errors.log',
        primary_metric='normalized_root_mean_squared_error',
        experiment_timeout_hours=1,
        training_data=train_data,
        label_column_name=target_column_name,
        compute_target=cpu_cluster,
        enable_early_stopping=True,
        n_cross_validations=3,
        verbosity=logging.INFO,
        max_cores_per_iteration=-1,
        forecasting_parameters=forecasting_parameters
    )

    remote_run = exp.submit(automl_config, show_output=False)

    best_run, fitted_model = remote_run.get_output()

    return fitted_model
コード例 #4
0
ファイル: 04_automl.py プロジェクト: wguo123/AML-service-labs
def auto_train_model(ws, experiment_name, model_name, full_X, full_Y,training_set_percentage, training_target_accuracy):

    # start a training run by defining an experiment
    experiment = Experiment(ws, experiment_name)
    
    train_X, test_X, train_Y, test_Y = train_test_split(full_X, full_Y, train_size=training_set_percentage, random_state=42)

    train_Y_array = train_Y.values.flatten()

    # Configure the automated ML job
    # The model training is configured to run on the local machine
    # The values for all settings are documented at https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train
    # Notice we no longer have to scale the input values, as Auto ML will try various data scaling approaches automatically
    Automl_config = AutoMLConfig(task = 'classification',
                                 primary_metric = 'accuracy',
                                 max_time_sec = 12000,
                                 iterations = 20,
                                 n_cross_validations = 3,
                                 exit_score = training_target_accuracy,
                                 blacklist_algos = ['kNN','LinearSVM'],
                                 X = train_X,
                                 y = train_Y_array,
                                 path='./04-automl/outputs')

    # Execute the job
    run = experiment.submit(Automl_config, show_output=True)

    # Get the run with the highest accuracy value.
    best_run, best_model = run.get_output()

    return (best_model, run, best_run)
def RunAutoML():
        subscription_id = request.json['subscription_id']
        resource_group = request.json['resource_group']
        workspace_name = request.json['workspace_name']
        file_name = request.json['file_name']
        #location = request.json['location']
    
        ws = Workspace(subscription_id=subscription_id,
                                  resource_group=resource_group,
                                  workspace_name=workspace_name)
                                            
        print("Found workspace {} at location {}".format(ws.name, ws.location))
        print('Found existing Workspace.')
            
        dataset_name = file_name

        # Get a dataset by name
        df = Dataset.get_by_name(workspace=ws, name=dataset_name)
        stock_dataset_df = df.to_pandas_dataframe()
        print('file successfully recieved.')
        stock_dataset_df.head()
        #stock_dataset_json = stock_dataset_df.to_json(orient='split')
        #print(stock_dataset_json)
        y_df = stock_dataset_df['ActionTaken'].values
        x_df = stock_dataset_df.drop(['ActionTaken'], axis=1)
        
        ExperimentName = request.json['ExperimentName']       
        tasks = request.json['tasks']
        iterations = request.json['iterations']
        iteration_timeout_minutes = request.json['iteration_timeout_minutes']
        primary_metric = request.json['primary_metric']
        
        #n_cross_validations = request.json['n_cross_validations']
        
        try:
            automl_config = AutoMLConfig(
                task=tasks,
                X=x_df,
                y=y_df,
                iterations=iterations,
                iteration_timeout_minutes=iteration_timeout_minutes,
                primary_metric=primary_metric,
                #n_cross_validations=n_cross_validations,
                preprocess=True,
                )
            experiment = Experiment(ws, ExperimentName)
            run = experiment.submit(config=automl_config, show_output=True)
    
            best_model,fitted_model = run.get_output()

            return 'ok'
        except:

            return 'error'
コード例 #6
0
def main(train_path, pred_path, n_pred, dt, target, time_limit_min):
    df_train = pd.read_csv(train_path)
    df_train[dt] = pd.to_datetime(df_train[dt])

    time_series_settings = {
        "time_column_name": dt,
        "max_horizon": n_pred,
        "target_lags": "auto",
        "target_rolling_window_size": "auto"
    }
    automl_config = AutoMLConfig(task="forecasting",
                                 training_data=df_train,
                                 label_column_name=target,
                                 n_cross_validations=5,
                                 max_cores_per_iteration=-1,
                                 path=os.environ["SCRATCH"],
                                 experiment_timeout_minutes=time_limit_min,
                                 ensemble_download_models_timeout_sec=3600,
                                 **time_series_settings)
    ws = Workspace.from_config()
    experiment = Experiment(ws, "experiment")
    best_run, fitted_model = experiment.submit(automl_config,
                                               show_output=True).get_output()

    print("Best pipeline:")
    try:
        ensemble = vars(fitted_model.steps[1][1])["_wrappedEnsemble"]
        print(ensemble.__class__)
        steps = ensemble.estimators_
    except:
        steps = fitted_model.steps
    best_pipeline = ""
    for i, step in enumerate(steps):
        best_pipeline += f"{i}. {str(step)}\n"
    print(best_pipeline)

    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_colwidth', -1)
    print(fitted_model.named_steps["timeseriestransformer"].
          get_engineered_feature_names())
    featurization_summary = fitted_model.named_steps[
        "timeseriestransformer"].get_featurization_summary()
    print(pd.DataFrame.from_records(featurization_summary))

    x_pred = pd.date_range(df_train[dt].iloc[-1],
                           periods=n_pred + 1,
                           freq=pd.infer_freq(df_train[dt]))[1:]
    y_pred = fitted_model.forecast(forecast_destination=x_pred[-1])[0]
    #     y_pred = fitted_model.forecast(pd.DataFrame({dt: x_pred}))[0]

    df_pred = pd.DataFrame({dt: x_pred, target: y_pred})
    df_pred.to_csv(pred_path, index=False)
コード例 #7
0
def train_model(data_file, random_seed):
    """Train the automl model."""
    target = "utilization"
    df = pd.read_parquet(data_file)

    x = df.loc[:, [c for c in df if c != target]].values
    y = df[target].values
    project_folder = "./automl"

    automl_config = AutoMLConfig(
        task="regression",
        iteration_timeout_minutes=5,
        iterations=10,
        primary_metric="spearman_correlation",
        n_cross_validations=5,
        debug_log="automl.log",
        verbosity=logging.INFO,
        X=x,
        y=y,
        path=project_folder,
    )

    load_dotenv(find_dotenv())
    ws = Workspace(
        workspace_name=getenv("AML_WORKSPACE_NAME"),
        subscription_id=getenv("AML_SUBSCRIPTION_ID"),
        resource_group=getenv("AML_RESOURCE_GROUP"),
    )
    experiment = Experiment(ws, getenv("AML_EXPERIMENT_NAME"))

    local_run = experiment.submit(automl_config, show_output=True)

    sub_runs = list(local_run.get_children())

    best_run = None
    best_score = 0

    for sub_run in sub_runs:
        props = sub_run.get_properties()
        if props["run_algorithm"] != "Ensemble":
            if float(props["score"]) > best_score:
                best_run = sub_run

    model_name = "Automl{}".format(str(uuid.uuid4()).replace("-", ""))[:20]
    best_run.register_model(model_name=model_name,
                            model_path="outputs/model.pkl")

    # best_run, fitted_model = local_run.get_output()
    # local_run.register_model(
    #     description="automl meetup best model"
    # )
    print("Model name is {}".format(model_name))
コード例 #8
0
    def submit(self,
               dispatcher: CollectingDispatcher,
               tracker: Tracker,
               domain: Dict[Text, Any],) -> List[Dict]:
        """Define what the form has to do
        after all required slots are filled"""
        task=tracker.get_slot('task')
        data=tracker.get_slot('data')
        column_name=tracker.get_slot('column_name')
        dispatcher.utter_message(template="utter_doing_task", task=tracker.get_slot('task'),data=tracker.get_slot('data'),
                                 column_name=tracker.get_slot('column_name'))
        # Load the workspace from the saved config file
        ws = Workspace.from_config()
        print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

        
        df = pd.read_csv(data)
        train_data, test_data = train_test_split(df, test_size=0.1, random_state=42)
        label = column_name
        automl_config = AutoMLConfig(name='Automated ML Experiment',
                             task= task,
                             compute_target='local',
                             training_data = train_data,
                             validation_data = test_data,
                             label_column_name= label,
                             experiment_timeout_minutes=30,
                             iterations=6,
                             primary_metric = 'AUC_weighted',
                             featurization='auto',
                             )
        automl_experiment = Experiment(ws, 'mslearn-diabetes-automl')
        automl_run = automl_experiment.submit(automl_config)
        best_run, fitted_model = automl_run.get_output()
        best_run_metrics = best_run.get_metrics()
        metric_list = []
        for metric_name in best_run_metrics:
            metric = best_run_metrics[metric_name]
            metric_list.append((metric_name, metric))
        return fitted_model, metric_list
        
        print("The best model pipeline for the data is")
        dispatcher.utter_message(text="The best model pipeline for the data is")
        print(model)
        dispatcher.utter_message(model)
        print("The different metrics are")
        dispatcher.utter_message(text="The different metrics are")
        print(metrics)
        dispatcher.utter_message(text=metrics)
                             column_name=tracker.get_slot('column_name'))
コード例 #9
0
 def autoMLRegression(self, x_df, y_df):
     run = Run.get_context()
     experiment = run.experiment
     train_data = pd.concat([x_df, y_df], axis=1)
     column_name = list(y_df)[0]
     automl_classifier = AutoMLConfig(
         task='regression',
         primary_metric='normalized_root_mean_squared_error',
         experiment_timeout_minutes=15,
         training_data=train_data,
         label_column_name=column_name,
         n_cross_validations=self.k_fold,
         enable_onnx_compatible_models=True,
         model_explainability=True)
     run = experiment.submit(automl_classifier, show_output=True)
     best_run, fitted_model = run.get_output()
     return best_run, fitted_model
コード例 #10
0
def train_model(file_path, data, logger):
    file_name = file_path.split('/')[-1][:-4]
    print(file_name)
    logger.info("in train_model")
    print('data')
    print(data.head(5))
    automl_config = AutoMLConfig(training_data=data, **automl_settings)

    logger.info("submit_child")
    local_run = current_step_run.submit_child(automl_config, show_output=False)
    logger.info(local_run)
    print(local_run)
    local_run.wait_for_completion(show_output=True)

    fitted_model = local_run.get_output()

    return fitted_model, local_run
コード例 #11
0
 def train(self):
     automl_settings = {
         "iteration_timeout_minutes": self.iteration_timeout_minutes,
         "iterations": self.max_n_trials,
         "primary_metric": self.metric,
         "verbosity": logging.DEBUG,
         "n_cross_validations": self.cross_validation_folds,
         "enable_stack_ensemble": self.use_ensemble
     }
     self.data_script = "get_data.py"
     self.generate_data_script()
     self.automl_config = AutoMLConfig(task='regression',
                                       debug_log='automl_errors.log',
                                       compute_target=self.compute_cluster,
                                       data_script="get_data.py",
                                       **automl_settings)
     experiment = Experiment(self.ws, 'automl_remote')
     print("Submitting training run: {}:".format(self.ws))
     remote_run = experiment.submit(self.automl_config, show_output=True)
     print("Results of training run: {}:".format(remote_run))
コード例 #12
0
def train_model(file_path, data, automl_settings, current_step_run):
    file_name = file_path.split('/')[-1][:-4]
    print(file_name)
    print("in train_model")
    print('data')
    print(data.head(5))
    print(automl_settings)
    automl_config = AutoMLConfig(training_data=data, **automl_settings)

    print("submit_child")
    local_run = current_step_run.submit_child(automl_config, show_output=True)

    local_run.add_properties({
        k: str(many_model_run_properties[k])
        for k in many_model_run_properties
    })

    print(local_run)

    best_child_run, fitted_model = local_run.get_output()

    return fitted_model, local_run, best_child_run
コード例 #13
0
    def setup_training_step(self):
        prepped_data = self.prepped_data_path.parse_parquet_files(
            file_extension=None)
        project_folder = './automl'

        automl_config = AutoMLConfig(compute_target=self.aml_compute,
                                     task="classification",
                                     training_data=prepped_data,
                                     label_column_name="test_result",
                                     path=project_folder,
                                     enable_early_stopping=True,
                                     featurization='auto',
                                     debug_log="automl_errors.log",
                                     n_cross_validations=10,
                                     **automl_settings)

        ds = self.ws.get_default_datastore()
        metrics_output_name = 'metrics_output'
        best_model_output_name = 'model_output'

        metrics_data = PipelineData(
            name='metrics_data',
            datastore=ds,
            pipeline_output_name=metrics_output_name,
            training_output=TrainingOutput(type='Metrics'))
        model_data = PipelineData(name='best_model_data',
                                  datastore=ds,
                                  pipeline_output_name=best_model_output_name,
                                  training_output=TrainingOutput(type='Model'))

        self.model_data = model_data

        automl_step = AutoMLStep(name='automl_module',
                                 automl_config=automl_config,
                                 passthru_automl_config=False,
                                 outputs=[metrics_data, model_data],
                                 allow_reuse=True)

        return automl_step
コード例 #14
0
ファイル: train.py プロジェクト: tiagoh/mlops-demo
def main():

    print(azureml.core.VERSION)

    dataset_name = getRuntimeArgs()

    run = Run.get_context()
    ws = run.experiment.workspace

    ds = Dataset.get_by_name(workspace=ws, name=dataset_name)

    automl_settings = {
        "task": 'classification',
        "verbosity": logging.INFO,
        "primary_metric": 'accuracy',
        "experiment_timeout_hours": 0.05,
        "n_cross_validations": 3,
        "enable_stack_ensemble": False,
        "enable_voting_ensemble": False,
        "model_explainability": True,
        "preprocess": True,
        "max_cores_per_iteration": -1,
        "max_concurrent_iterations": 4,
        "training_data": ds,
        "drop_column_names": ['Sno'],
        "label_column_name": 'Risk'
    }

    automl_config = AutoMLConfig(**automl_settings)
    run = run.submit_child(automl_config, show_output=True)

    best_run, fitted_model = run.get_output()

    output_dir = './outputs/'
    os.makedirs(output_dir, exist_ok=True)
    shutil.copy2('automl.log', output_dir)

    with open(output_dir + 'best_run.json', 'w') as f:
        json.dump(best_run, f)
コード例 #15
0
x, y = clean_data(ds1)


# In[5]:


# Split data into train and test sets
from sklearn.model_selection import train_test_split
import pandas as pd
x_train, x_test, y_train, y_test = train_test_split(x, y)
df_train = pd.concat([x_train, y_train], axis=1)
from azureml.train.automl import AutoMLConfig
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task="classification",
    primary_metric="accuracy",
    training_data=df_train,
    label_column_name="y",
    n_cross_validations=5)


# In[7]:


# Submit your automl run

### YOUR CODE HERE ###
automl_run= exp.submit(automl_config, show_output=True)


# In[8]:
コード例 #16
0
    time_series_id_column_names=time_series_id_column_names,
    target_lags=target_lag,
    feature_lags=feature_lag,
    target_rolling_window_size=window_size,
    seasonality=seasonality  #, 
)

automl_config = AutoMLConfig(  # featurization_config,
    task="forecasting",
    debug_log="rev_region_forecast_errors.log",
    primary_metric="normalized_root_mean_squared_error",
    experiment_timeout_hours=4,
    training_data=train_dataset,
    label_column_name=target_column_name,
    enable_early_stopping=False,
    #spark_context=sc, #enable this for databricks cluster
    compute_target=compute_target,  # enable this for ml cluster
    enable_dnn=True,  # enable this for ml cluster
    featurization="auto",
    n_cross_validations=5,
    verbosity=logging.INFO,
    max_concurrent_iterations=9,
    max_cores_per_iteration=-1,
    forecasting_parameters=forecasting_parameters,
)

# COMMAND ----------

# DBTITLE 1,Train
# submit a new training run
from azureml.train.automl.run import AutoMLRun
コード例 #17
0
from azureml.train.automl import AutoMLConfig

autu_run_config = RunConfiguration(framework='python')

automl_config = AutoMLConfig(name="Automated ML Experiment",
                             task='classification',
                             primary_metric='AUC_weighted',
                             compute_target=aml_compute,
                             training_data=train_dataset,
                             validation_data=test_dataset,
                             label_column_name='Label',
                             featurization='auto',
                             iterations=12,
                             max_concurrent_iterations=4)
コード例 #18
0
ファイル: automl_step.py プロジェクト: jmservera/ACE_Azure_ML
X_train = df_all.loc[df_all['datetime'] < '2015-10-01', ].drop(X_drop, axis=1)
y_train = df_all.loc[df_all['datetime'] < '2015-10-01', Y_keep]

X_test = df_all.loc[df_all['datetime'] > '2015-10-15', ].drop(X_drop, axis=1)
y_test = df_all.loc[df_all['datetime'] > '2015-10-15', Y_keep]

primary_metric = 'AUC_weighted'

automl_config = AutoMLConfig(
    task='classification',
    preprocess=False,
    name=experiment_name,
    debug_log='automl_errors.log',
    primary_metric=primary_metric,
    max_time_sec=1200,
    iterations=2,
    n_cross_validations=2,
    verbosity=logging.INFO,
    X=X_train.values,  # we convert from pandas to numpy arrays using .vaules
    y=y_train.
    values[:, 0],  # we convert from pandas to numpy arrays using .vaules
    path=project_folder,
)

local_run = experiment.submit(automl_config, show_output=True)

# Wait until the run finishes.
local_run.wait_for_completion(show_output=True)

# create new AutoMLRun object to ensure everything is in order
ml_run = AutoMLRun(experiment=experiment, run_id=local_run.id)
コード例 #19
0
def main(
    workspace=None,
    dataset_trainandvalidate_name=config.get_default_dataset_name(
        "trainandvalidate"),
):
    """
    Return AutoMLConfig
    """

    if not workspace:
        workspace = package_utils.get_workspace()

    args = aml_compute.parse_args()
    cluster_max_nodes = 5
    args.cluster_max_nodes = cluster_max_nodes
    args.cluster_sku = "Standard_D12_v2"
    compute_target = aml_compute.main(args)
    logger.info(msg="main",
                extra={"compute_target": compute_target.serialize()})

    trainandvalidate = Dataset.get_by_name(
        workspace=workspace,
        name=dataset_trainandvalidate_name,
    )

    model_settings = {
        "task": "classification",
        "primary_metric": "norm_macro_recall",
    }

    ensemble_settings = {
        "iterations":
        15,
        "allowed_models":
        ["LightGBM", "LogisticRegression", "SGD", "XGBoostClassifier"],
        "enable_voting_ensemble":
        True,
        "enable_stack_ensemble":
        False,
    }

    dataset_settings = {
        "validation_size": 0.3,
        "featurization": "auto",
        "training_data": trainandvalidate,
        "label_column_name": "Label",
    }

    compute_settings = {
        "compute_target": compute_target,
        "max_cores_per_iteration": -1,
        "max_concurrent_iterations": cluster_max_nodes,
        "experiment_timeout_hours": 1.5,
    }

    automl_config = AutoMLConfig(
        **model_settings,
        **ensemble_settings,
        **dataset_settings,
        **compute_settings,
    )

    return automl_config
コード例 #20
0
def RunAutoML():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    #location = request.json['location']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')

    dataset_name = file_name

    # Get a dataset by name
    df = Dataset.get_by_name(workspace=ws, name=dataset_name)
    stock_dataset_df = df.to_pandas_dataframe()
    print('file successfully recieved.')
    stock_dataset_df.head()
    #stock_dataset_json = stock_dataset_df.to_json(orient='split')
    #print(stock_dataset_json)
    y_df = stock_dataset_df['ActionTaken'].values
    x_df = stock_dataset_df.drop(['ActionTaken'], axis=1)
    print(y_df)
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    max_concurrent_iterations = request.json['max_concurrent_iterations']

    #n_cross_validations = request.json['n_cross_validations']

    try:
        automl_settings = {
            "name": ExperimentName,
            "iteration_timeout_minutes": iteration_timeout_minutes,
            "iterations": iterations,
            "n_cross_validations": n_cross_validations,
            "primary_metric": primary_metric,
            "preprocess": True,
            "max_concurrent_iterations": max_concurrent_iterations,
            "verbosity": logging.INFO
        }

        automl_config = AutoMLConfig(
            task=tasks,
            debug_log='automl_errors.log',
            path=os.getcwd(),
            #compute_target = 'Automlvm',
            X=x_df,
            y=y_df,
            **automl_settings,
        )

        experiment = Experiment(ws, 'automl_local_v2')
        remote_run = experiment.submit(automl_config, show_output=True)
        children = list(remote_run.get_children())
        metricslist = {}
        for run in children:
            properties = run.get_properties()
            metrics = {
                k: v
                for k, v in run.get_metrics().items() if isinstance(v, float)
            }
            metricslist[int(properties['iteration'])] = metrics

        rundata = pd.DataFrame(metricslist).sort_index(1)
        rundata_toJson = rundata.to_json(orient='columns')

        return rundata_toJson
    except:

        return 'error'
コード例 #21
0
ファイル: automl_step.py プロジェクト: wmpauli/aml_pipeline
ws = run.experiment.workspace
def_data_store = ws.get_default_datastore()

# Choose a name for the experiment and specify the project folder.
experiment_name = 'automl-local-classification'
project_folder = './sample_projects/automl-local-classification'

experiment = Experiment(ws, experiment_name)

primary_metric = 'accuracy'

automl_config = AutoMLConfig(task='classification',
                             debug_log='automl_errors.log',
                             primary_metric=primary_metric,
                             iteration_timeout_minutes=60,
                             iterations=2,
                             n_cross_validations=3,
                             verbosity=logging.INFO,
                             X=X_train,
                             y=y_train,
                             path=project_folder)

local_run = experiment.submit(automl_config, show_output=True)

# Wait until the run finishes.
local_run.wait_for_completion(show_output=True)

# create new AutoMLRun object to ensure everything is in order
ml_run = AutoMLRun(experiment=experiment, run_id=local_run.id)


# aux function for comparing performance of runs (quick workaround for automl's _get_max_min_comparator)
コード例 #22
0
    workspace = Workspace.from_config(auth=servicePrincipalAuth)
    compute_manager = BatchAIManager(workspace)
    compute_target = compute_manager.get_or_create(compute_target_name)

    print('Prepare environment and code')
    script_folder = './training'
    shutil.copy('get_data.py', script_folder)
        
    automl_settings = {
        "max_time_sec": 120,
        "iterations": 20,
        "n_cross_validations": 5,
        "primary_metric": 'AUC_weighted',
        "blacklist_algos" = ['kNN','LinearSVM'],
        "preprocess": False,
        "concurrent_iterations": 5,
        "verbosity": logging.INFO
    }

    automl_config = AutoMLConfig(task = 'classification',
                             debug_log = 'automl_errors.log',
                             path = '.',
                             compute_target = compute_target,
                             data_script = script_folder + '/get_data.py',
                             **automl_settings
                            )

    experiment = Experiment(workspace=workspace, name='fashionMNIST_autoML')
    remote_run = experiment.submit(automl_config, show_output=False)
    
コード例 #23
0
get_ipython().system('pip3 install --upgrade azureml-sdk azureml-contrib-run')


# In[1]:


from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.train.automl import AutoMLConfig
import logging

automl_config = AutoMLConfig(task='forecasting',
                             primary_metric='normalized_root_mean_squared_error',
                             iterations=10,
                             X=X_train,
                             y=y_train,
                             n_cross_validations=5,
                             enable_ensembling=False,
                             verbosity=logging.INFO,
                             **time_series_settings)

ws = Workspace.from_config()
experiment = Experiment(ws, "forecasting_example")
local_run = experiment.submit(automl_config, show_output=True)
best_run, fitted_model = local_run.get_output()


# In[ ]:


コード例 #24
0
ファイル: azure_a2ml.py プロジェクト: raymanchester/a2ml
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output, index=['']).T

# get_data script does this now
csv_file = "../data/" + experiment_name + ".csv"

automl_settings = {
    "iteration_timeout_minutes": 10,
    "iterations": 30,
    "primary_metric": 'spearman_correlation',
    "preprocess": True,
    "verbosity": logging.DEBUG,
    "n_cross_validations": 5
}
dflow = dprep.read_csv

automl_config = AutoMLConfig(task='regression',
                             debug_log='automl_errors.log',
                             path=project_folder,
                             compute_target=compute_target,
                             data_script="get_data.py",
                             **automl_settings)

experiment = Experiment(ws, 'automl_remote')
remote_run = experiment.submit(automl_config, show_output=True)
コード例 #25
0
    cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                                  conda_packages=['numpy'])
    conda_run_config.environment.python.conda_dependencies = cd

    automl_settings = {
        "iteration_timeout_minutes": 60,
        "iterations": 100,
        "n_cross_validations": 5,
        "primary_metric": 'AUC_weighted',
        "preprocess": True,
        "max_cores_per_iteration": 2
    }

    automl_config = AutoMLConfig(task='classification',
                                 path=project_folder,
                                 run_configuration=conda_run_config,
                                 data_script=project_folder + "/get_data.py",
                                 **automl_settings)

    remote_run = experiment.submit(automl_config)

    # Canceling runs
    #
    # You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions

    print(remote_run.id)

    time.sleep(180)

    # Cancel the ongoing experiment and stop scheduling new iterations
    remote_run.cancel()
automl_settings = {
    "name": "AutoML_Demo_Experiment_{0}".format(time.time()),
    "iteration_timeout_minutes": 10,
    "iterations": 20,
    "n_cross_validations": 5,
    "primary_metric": 'AUC_weighted',
    "preprocess": False,
    "max_concurrent_iterations": 10,
    "verbosity": logging.INFO
}

automl_config = AutoMLConfig(
    task='classification',
    debug_log='automl_errors.log',
    path=project_folder,
    compute_target=compute_target,
    run_configuration=run_config,
    X=X,  ##use the remote uploaded data
    y=y,
    **automl_settings,
)

# ## Train
#
# Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.
#
# |Property|Description|
# |-|-|
# |**task**|classification or regression|
# |**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|
# |**X**|(sparse) array-like, shape = [n_samples, n_features]|
# |**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|
コード例 #27
0
p = ESMLProject.get_project_from_env_command_line() # self-aware about its config sources
p.describe()

cli_auth = AzureCliAuthentication()
ws = p.get_workspace_from_config(cli_auth) # Reads the current environment (dev,test, prod)config.json | Use CLI auth if MLOps
p.init(ws) # Automapping from datalake to Azure ML datasets, prints status

# TRAIN MODEL
automl_performance_config = p.get_automl_performance_config() # 1)Get config
aml_compute = p.get_training_aml_compute(ws) # 2)Get compute, for active environment

label = "Y"
train_6, validate_set_2, test_set_2 = p.split_gold_3(0.6,label) # 3) Auto register in Azure (M03_GOLD_TRAIN | M03_GOLD_VALIDATE | M03_GOLD_TEST) 
automl_config = AutoMLConfig(task = 'regression', # 4) 
                            primary_metric = azure_metric_regression.MAE, #Note: !MAPE 
                            experiment_exit_score = '0.208', # DEMO purpose
                            compute_target = aml_compute,
                            training_data = p.GoldTrain, 
                            label_column_name = label,
                            **automl_performance_config
                        )
train_as_pipeline = False
best_run, fitted_model, experiment = None, None, None # Consistent return values from both AutoML ALTERNATIVES

if (train_as_pipeline == True):
    print("train_as_pipeline")
    best_run, fitted_model, experiment = AutoMLFactory(p).train_pipeline(automl_config) #) 5 Train model
else: 
    print("train_as_run")
    best_run, fitted_model, experiment = AutoMLFactory(p).train_as_run(automl_config)
コード例 #28
0
ファイル: TimeSeries.py プロジェクト: haupadhy/sp20-516-255
test = dataset.time_between(datetime(2017, 8, 8, 6), datetime(2017, 8, 10, 5))
test.to_pandas_dataframe().reset_index(drop=True).head(5)

max_horizon = 12

automl_settings = {
    'time_column_name': time_column_name,
    'max_horizon': max_horizon,
}

automl_config = AutoMLConfig(
    task='forecasting',
    primary_metric='normalized_root_mean_squared_error',
    blacklist_models=['ExtremeRandomTrees', 'AutoArima', 'Prophet'],
    experiment_timeout_hours=0.3,
    training_data=train,
    label_column_name=target_column_name,
    compute_target=compute_target,
    enable_early_stopping=True,
    n_cross_validations=3,
    verbosity=logging.INFO,
    **automl_settings)

remote_run = experiment.submit(automl_config, show_output=False)
remote_run
remote_run.wait_for_completion()

best_run, fitted_model = remote_run.get_output()
fitted_model.steps

fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names(
)
コード例 #29
0
x = dprep.read_parquet_file(ds.path('model_data_x.parquet'))
y = dprep.read_parquet_file(ds.path('model_data_y.parquet')).to_long(
    dprep.ColumnSelector(term='.*', use_regex=True))

project_folder = './automl'
automl_config = AutoMLConfig(
    task="regression",
    iteration_timeout_minutes=10,
    iterations=10,
    primary_metric="r2_score",
    n_cross_validations=5,
    debug_log="automl.log",
    verbosity=logging.INFO,
    spark_context=sc,  # noqa
    whitelist_models=[
        "GradientBoosting",
        "DecisionTree",
        "RandomForest",
        "ExtremeRandomTrees",
        "LightGBM",
    ],
    blacklist_models=["ensemble"],
    X=x,
    y=y,
    path=project_folder,
)

experiment = Experiment(ws, "host-ml-nt-ai-meetup")

db_run = experiment.submit(automl_config, show_output=True)
コード例 #30
0
def RunAutoML():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    location = request.json['location']
    target_var = request.json['target_var']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')

    dataset_name = file_name

    # Get a dataset by name
    df = Dataset.get_by_name(workspace=ws, name=dataset_name)
    stock_dataset_df = df.to_pandas_dataframe()
    print('file successfully recieved.')
    stock_dataset_df.head()
    #stock_dataset_json = stock_dataset_df.to_json(orient='split')
    #print(stock_dataset_json)
    y_df = stock_dataset_df[target_var].values
    x_df = stock_dataset_df.drop([target_var], axis=1)
    print(y_df)
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    max_concurrent_iterations = request.json['max_concurrent_iterations']
    best_model = request.json['best_model']

    #n_cross_validations = request.json['n_cross_validations']

    try:
        automl_settings = {
            "name": ExperimentName,
            "iteration_timeout_minutes": iteration_timeout_minutes,
            "iterations": iterations,
            "n_cross_validations": n_cross_validations,
            "primary_metric": primary_metric,
            "preprocess": True,
            "max_concurrent_iterations": max_concurrent_iterations,
            "verbosity": logging.INFO
        }

        automl_config = AutoMLConfig(
            task=tasks,
            debug_log='automl_errors.log',
            path=
            'D:\\Stock_Prediction\\AutoML_Azure\\python\\Flask_API_Azure\\log',
            #compute_target = 'Automlvm',
            X=x_df,
            y=y_df,
            **automl_settings,
        )

        experiment = Experiment(ws, ExperimentName)
        remote_run = experiment.submit(automl_config, show_output=True)
        best_run, fitted_model = remote_run.get_output()
        #print(best_run)
        print(best_run.get_file_names())
        #Register the model
        from datetime import date
        model = best_run.register_model(model_name=best_model +
                                        str(date.today()),
                                        model_path='outputs/model.pkl')
        print(model.name, model.id, model.version, sep='\t')
        children = list(remote_run.get_children())
        metricslist = {}
        for run in children:
            properties = run.get_properties()
            metrics = {
                k: v
                for k, v in run.get_metrics().items() if isinstance(v, float)
            }
            metricslist[int(properties['iteration'])] = metrics

        rundata = pd.DataFrame(metricslist).sort_index(1)
        rundata.rename(column={
            0: "one",
            1: "two",
            2: "three",
            3: "four",
            4: "five",
            5: "six",
            6: "seven",
            7: "right",
            8: "nine",
            9: "ten",
        },
                       inplace=True)
        rundata_toJson = rundata.to_json(orient='columns')
        print(rundata_toJson)
        return rundata_toJson
    except:

        return 'error'