Python Dataset.get_by_name Exemples, azureml.core.dataset.Dataset.get_by_name Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : batch_scoring.py Projet : zhangyupisa/MachineLearningNotebooks

def init():
    global g_tf_sess, probabilities, label_dict, input_images

    parser = argparse.ArgumentParser(description="Start a tensorflow model serving")
    parser.add_argument('--model_name', dest="model_name", required=True)
    parser.add_argument('--labels_name', dest="labels_name", required=True)
    args, _ = parser.parse_known_args()

    workspace = Run.get_context(allow_offline=False).experiment.workspace
    label_ds = Dataset.get_by_name(workspace=workspace, name=args.labels_name)
    label_ds.download(target_path='.', overwrite=True)

    label_dict = get_class_label_dict()
    classes_num = len(label_dict)

    with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
        input_images = tf.placeholder(tf.float32, [1, image_size, image_size, num_channel])
        logits, _ = inception_v3.inception_v3(input_images,
                                              num_classes=classes_num,
                                              is_training=False)
        probabilities = tf.argmax(logits, 1)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    g_tf_sess = tf.Session(config=config)
    g_tf_sess.run(tf.global_variables_initializer())
    g_tf_sess.run(tf.local_variables_initializer())

    model_path = Model.get_model_path(args.model_name)
    saver = tf.train.Saver()
    saver.restore(g_tf_sess, model_path)

Exemple #2

0

Afficher le fichier

Fichier : aml_dataset_handler.py Projet : parety/mlapp

 def download_file(self,
                   dataset_name,
                   dataset_type='pandas',
                   *args,
                   **kwargs):
     """
     Downloads file from file storage
     :param dataset_name: name of the dataset
     :param dataset_type: name of the dataset type
     :param args: other arguments containing additional information
     :param kwargs: other keyword arguments containing additional information
     """
     try:
         run = Run.get_context(allow_offline=False)
     except RunEnvironmentException as e:
         raise SkipServiceException('Skip AmlModelStorageHandler handler')
     ws = run.experiment.workspace
     ds = Dataset.get_by_name(workspace=ws,
                              name=dataset_name)  # Get a Dataset by name
     if dataset_type == 'spark':
         df = ds.to_spark_dataframe(
         )  # Load a Tabular Dataset into pandas DataFrame
     else:
         df = ds.to_pandas_dataframe(
         )  # Load a Tabular Dataset into pandas DataFrame
     return df

Exemple #3

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser()
    #params = {"objective": "binary:logistic", "max_depth": 3}
    run = Run.get_context()

    parser.add_argument('--num_boost_round', type=int, default=5, help="Number of boosting rounds")
    parser.add_argument('--max_depth', type=int, default=3, help="Maximum depth of the trees to be boosted")
    parser.add_argument('--learning_rate', type=float, default=0.001, help="Learning rate, xgb's eta")
    parser.add_argument('--gamma', type=float, default=0.1, help="Minimum loss reduction")
    parser.add_argument('--reg_lambda', type=float, default=0.1, help="L2 regularization term on weights")
    parser.add_argument('--scale_pos_weight', type=float, default=1.0, help="Balancing of positive and negative weights")

    args = parser.parse_args()

    workspace = run.experiment.workspace

    key = "heart-failure" #"Heart failure"
    description_text = "Heart failure dataset for udacity capstone"

    if key in workspace.datasets.keys():
        found = True
        dataset = Dataset.get_by_name(workspace, name='heart-failure')
        
    df = dataset.to_pandas_dataframe()
    X, y = df.loc[:, ~df.columns.isin(["DEATH_EVENT"])], df.loc[:, "DEATH_EVENT"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)


    params = {"scale_pos_weight": np.float(args.scale_pos_weight),
              "reg_lambda": np.float(args.reg_lambda),
              "gamma": np.float(args.gamma),
              "learning_rate": np.float(args.learning_rate),
              "max_depth": np.int(args.max_depth),
              "num_boost_round": np.int(args.num_boost_round)}

    run.log("scale_pos_weigth:", np.float(args.scale_pos_weight))
    run.log("Max depth:", np.int(args.max_depth))
    run.log("Learning rate:", np.int(args.learning_rate))
    run.log("Boosting rounds:", np.int(args.num_boost_round))
    run.log("Gamma (minimum loss reduction):", np.int(args.learning_rate))
    run.log("Lambda (L2 regularization):", np.int(args.reg_lambda))

    xgb_model = xgb.XGBClassifier(objective="binary:logistic",
                        n_estimators = np.int(args.num_boost_round),
                        max_depth = np.int(args.max_depth),
                        learning_rate=np.float(args.learning_rate),
                        gamma= np.float(args.gamma),
                        reg_lambda=np.float(args.reg_lambda),
                        scale_pos_weight=np.float(args.scale_pos_weight),
                        random_state=123)
    xgb_model.fit(X_train, y_train)
    y_pred = xgb_model.predict(X_test)
    fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=1)
    xgb_auc = auc(fpr, tpr)

    run.log("AUC", np.float(xgb_auc))
    os.makedirs('outputs', exist_ok=True)
    joblib.dump(value=xgb_model, filename='outputs/model.pkl')

Exemple #4

0

Afficher le fichier

def unregister_dataset(workspace=None, dataset_name=None, logger=None):
    try:
        dataset = Dataset.get_by_name(workspace, dataset_name)
    except:
        print('There is no dataset registered with name "{}".'.format(
            dataset_name))
        return
    dataset.unregister_all_versions()
    print('Successfully unregistered datasets with name "{}".'.format(
        dataset_name))

Exemple #5

0

Afficher le fichier

Fichier : WS_Create and Upload_main v3.py Projet : joeaponte/AutoML_Azure-Google-H2o

def RunAutoML():
        subscription_id = request.json['subscription_id']
        resource_group = request.json['resource_group']
        workspace_name = request.json['workspace_name']
        file_name = request.json['file_name']
        #location = request.json['location']
    
        ws = Workspace(subscription_id=subscription_id,
                                  resource_group=resource_group,
                                  workspace_name=workspace_name)
                                            
        print("Found workspace {} at location {}".format(ws.name, ws.location))
        print('Found existing Workspace.')
            
        dataset_name = file_name

        # Get a dataset by name
        df = Dataset.get_by_name(workspace=ws, name=dataset_name)
        stock_dataset_df = df.to_pandas_dataframe()
        print('file successfully recieved.')
        stock_dataset_df.head()
        #stock_dataset_json = stock_dataset_df.to_json(orient='split')
        #print(stock_dataset_json)
        y_df = stock_dataset_df['ActionTaken'].values
        x_df = stock_dataset_df.drop(['ActionTaken'], axis=1)
        
        ExperimentName = request.json['ExperimentName']       
        tasks = request.json['tasks']
        iterations = request.json['iterations']
        iteration_timeout_minutes = request.json['iteration_timeout_minutes']
        primary_metric = request.json['primary_metric']
        
        #n_cross_validations = request.json['n_cross_validations']
        
        try:
            automl_config = AutoMLConfig(
                task=tasks,
                X=x_df,
                y=y_df,
                iterations=iterations,
                iteration_timeout_minutes=iteration_timeout_minutes,
                primary_metric=primary_metric,
                #n_cross_validations=n_cross_validations,
                preprocess=True,
                )
            experiment = Experiment(ws, ExperimentName)
            run = experiment.submit(config=automl_config, show_output=True)
    
            best_model,fitted_model = run.get_output()

            return 'ok'
        except:

            return 'error'

Exemple #6

0

Afficher le fichier

def get_df_from_dataset(dataset_path, dataset_name, dataset_is_remote=False):
    """
    Return a DataFrame by reading the dataset from either a
    directory containing csv files or Azure Dataset
    """
    if dataset_is_remote:
        workspace = package_utils.get_workspace()
        df = Dataset.get_by_name(workspace=workspace,
                                 name=dataset_name).to_pandas_dataframe()
    else:
        df = get_df_from_directory(pathlib.Path(dataset_path, dataset_name))
    return df

Exemple #7

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--n_estimators',
                        type=int,
                        default=50,
                        help="Number of trees")
    parser.add_argument('--max_depth',
                        type=int,
                        default=3,
                        help="Maximum depth of the trees to used")
    parser.add_argument('--min_samples_split',
                        type=int,
                        default=2,
                        help="Minimum samples")

    run = Run.get_context()
    workspace = run.experiment.workspace

    key = "heart-failure"  #"Heart failure"
    description_text = "Heart failure dataset for udacity capstone"

    if key in workspace.datasets.keys():
        found = True
        dataset = Dataset.get_by_name(workspace, name='heart-failure')

    df = dataset.to_pandas_dataframe()
    X, y = df.loc[:, ~df.columns.isin(["DEATH_EVENT"])], df.loc[:,
                                                                "DEATH_EVENT"]
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=123)

    args = parser.parse_args()

    run.log("n_estimators:", np.int(args.n_estimators))
    run.log("max_depth:", np.int(args.max_depth))
    run.log("min_samples_split:", np.int(args.min_samples_split))

    rf_model = RandomForestClassifier(n_estimators=np.int(args.n_estimators),
                                      max_depth=np.int(args.max_depth),
                                      min_samples_split=np.int(
                                          args.min_samples_split),
                                      random_state=123)
    rf_model.fit(X_train, y_train)
    y_pred = rf_model.predict(X_test)
    fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=1)
    rf_auc = auc(fpr, tpr)

    run.log("AUC", np.float(rf_auc))
    os.makedirs('outputs', exist_ok=True)
    joblib.dump(value=rf_model, filename='outputs/model.pkl')

Exemple #8

0

Afficher le fichier

Fichier : train.py Projet : Haris-09/Coronary-Heart-Disease-Prediction-Capstone-Project

def main():
    # Add arguments to script
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--C',
        type=float,
        default=1.0,
        help=
        "Inverse of regularization strength. Smaller values cause stronger regularization"
    )
    parser.add_argument('--max_iter',
                        type=int,
                        default=100,
                        help="Maximum number of iterations to converge")

    args = parser.parse_args()

    run = Run.get_context()
    workspace = run.experiment.workspace

    run.log("Regularization Strength:", np.float(args.C))
    run.log("Max iterations:", np.int(args.max_iter))

    #The dataset is registered using Python SDK in the notebook
    dataset_name = 'Framingham-Prepared'

    # Get a dataset by name
    ds = Dataset.get_by_name(workspace=workspace, name=dataset_name)
    x, y = clean_data(ds)

    # TODO: Split data into train and test sets.

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=223)

    model = LogisticRegression(C=args.C,
                               max_iter=args.max_iter).fit(x_train, y_train)

    accuracy = model.score(x_test, y_test)

    run.log("Accuracy", np.float(accuracy))
    #save the best model
    os.makedirs('outputs', exist_ok=True)

    joblib.dump(value=model, filename='outputs/model.joblib')

Exemple #9

0

Afficher le fichier

def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--kernel', type=str, default='linear',
                        help='Kernel type to be used in the algorithm')
    parser.add_argument('--penalty', type=float, default=1.0,
                        help='Penalty parameter of the error term')

    args = parser.parse_args()
    run.log('Kernel type', np.str(args.kernel))
    run.log('Penalty', np.float(args.penalty))


    heart_dataset = Dataset.get_by_name(workspace=ws, name='Heart-Failure')
    df = heart_dataset.to_pandas_dataframe()
    

    y = df[df.columns[-1]]
    X = df.drop(df.columns[-1],axis=1)
    
    scaler = StandardScaler()
    scaled_X = scaler.fit_transform(X)

    # dividing X, y into train and test data
    X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, random_state=0)

    # training a linear SVM classifier
    from sklearn.svm import SVC
    svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
    svm_predictions = svm_model_linear.predict(X_test)

    # model accuracy for X_test
    accuracy = accuracy_score(svm_predictions, y_test)

    #print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))
    run.log('Accuracy', np.float(accuracy))
    

    os.makedirs('outputs', exist_ok=True)
    # files saved in the "outputs" folder are automatically uploaded into run history
    joblib.dump(svm_model_linear, 'outputs/model.joblib')

Exemple #10

0

Afficher le fichier

Fichier : WS_Create and Upload_main v2.py Projet : joeaponte/AutoML_Azure-Google-H2o

def DataBlob():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    #location = request.json['location']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')
    ds = ws.get_default_datastore()
    print(ds.datastore_type, ds.account_name, ds.container_name)
    try:
        stock_ds = Dataset.Tabular.from_delimited_files(
            path=ds.path(file_name))
        stock_ds = stock_ds.register(workspace=ws,
                                     name=file_name,
                                     description='stock training data')
        print('Found existing file name')
        return "This file name exist. Please rename or upload new file"
    except:
        print('Uploading new file, please wait')

    stock_dataset = Dataset.Tabular.from_delimited_files(
        path=ds.path(file_name))
    stock_dataset = stock_dataset.register(workspace=ws,
                                           name=file_name,
                                           description='stock training data')
    #file_name = json.loads(file_name)
    print(type(file_name))
    new_data = Dataset.get_by_name(ws, file_name, version='latest')
    print(new_data.name)
    print(type(new_data.name))
    stock_dataset_df = eval(new_data.name).to_pandas_dataframe()
    print('file successfully recieved.')
    stock_dataset_json = stock_dataset_df.to_json(orient='split')
    return stock_dataset_json

Exemple #11

0

Afficher le fichier

def get_dataset(workspace=None,
                dataset_name=None,
                dataset_version=None,
                dataset_id=None,
                logger=None):
    if dataset_name is None and dataset_id is None:
        raise UserErrorException('Argument {} or {} must be specified'.format(
            DATASET_NAME.long_form, DATASET_ID.long_form))
    if dataset_name is not None and dataset_id is not None:
        raise UserErrorException(
            'Arguments {} and {} cannot be specified at the same time'.format(
                DATASET_NAME.long_form, DATASET_ID.long_form))
    if dataset_version != DATASET_VERSION.default and dataset_name is None:
        raise UserErrorException(
            'Argument {} must be specified with {}'.format(
                DATASET_VERSION.long_form, DATASET_NAME.long_form))
    dataset_version = dataset_version or DATASET_VERSION.default
    if dataset_name is not None:
        dataset = Dataset.get_by_name(workspace, dataset_name, dataset_version)
    else:
        dataset = Dataset.get_by_id(workspace, dataset_id)
    return _dataset_to_printable(dataset)

Exemple #12

0

Afficher le fichier

Fichier : train.py Projet : tiagoh/mlops-demo

def main():

    print(azureml.core.VERSION)

    dataset_name = getRuntimeArgs()

    run = Run.get_context()
    ws = run.experiment.workspace

    ds = Dataset.get_by_name(workspace=ws, name=dataset_name)

    automl_settings = {
        "task": 'classification',
        "verbosity": logging.INFO,
        "primary_metric": 'accuracy',
        "experiment_timeout_hours": 0.05,
        "n_cross_validations": 3,
        "enable_stack_ensemble": False,
        "enable_voting_ensemble": False,
        "model_explainability": True,
        "preprocess": True,
        "max_cores_per_iteration": -1,
        "max_concurrent_iterations": 4,
        "training_data": ds,
        "drop_column_names": ['Sno'],
        "label_column_name": 'Risk'
    }

    automl_config = AutoMLConfig(**automl_settings)
    run = run.submit_child(automl_config, show_output=True)

    best_run, fitted_model = run.get_output()

    output_dir = './outputs/'
    os.makedirs(output_dir, exist_ok=True)
    shutil.copy2('automl.log', output_dir)

    with open(output_dir + 'best_run.json', 'w') as f:
        json.dump(best_run, f)

Exemple #13

0

Afficher le fichier

Fichier : base_pipeline.py Projet : zy911k24/seismic-deeplearning

    def _setup_dataset(self, ds_name, data_paths):
        """
        registers datasets with azureml workspace

        :param str ds_name: [required] name to give the dataset in azureml.
        :param str data_paths: [required] list of paths to your data on the datastore.
        """
        self.named_ds = []
        count = 1
        for data_path in data_paths:
            curr_name = ds_name + str(count)
            path_on_datastore = self.blob_ds.path(data_path)
            input_ds = Dataset.File.from_files(path=path_on_datastore,
                                               validate=False)
            try:
                registered_ds = input_ds.register(workspace=self.ws,
                                                  name=curr_name,
                                                  create_new_version=True)
            except Exception as e:
                n, v = self._parse_exception(e)
                registered_ds = Dataset.get_by_name(self.ws, name=n, version=v)
            self.named_ds.append(registered_ds.as_named_input(curr_name))
            count = count + 1

Exemple #14

0

Afficher le fichier

Fichier : train_Automl.py Projet : Anishtalukdar/AutoML_Introact_MLOps_repo

ds.upload(src_dir=file_path,
          target_path=None,
          overwrite=True,
          show_progress=True)

stock_ds = Dataset.Tabular.from_delimited_files(path=datastore.path(file_name))
stock_ds = stock_ds.register(workspace=ws,
                             name=file_name,
                             description='Introact Owner Data')

compute_target = AmlCompute(ws, cluster_name)
print('Found existing AML compute context.')
dataset_name = file_name

# Get a dataset by name
df = Dataset.get_by_name(workspace=ws, name=dataset_name)

X = df.drop_columns(columns=[target_var])
y = df.keep_columns(columns=[target_var], validate=True)
print(y)
#y = diabetes.pop('Y')
#X_train, X_test, y_train, y_test = train_test_split(diabetes, y, test_size=0.2, random_state=0)
#data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}}
conda_run_config = RunConfiguration(framework="python")
conda_run_config.environment.docker.enabled = True
conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                              conda_packages=['numpy', 'py-xgboost<=0.80'])
conda_run_config.environment.python.conda_dependencies = cd
print('run config is ready')

Exemple #15

0

Afficher le fichier

def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    # interactive_auth = InteractiveLoginAuthentication(tenant_id="b88f1ff4-e3ab-4adb-83e6-4ea99d41c665")

    sp = ServicePrincipalAuthentication(tenant_id='b88f1ff4-e3ab-4adb-83e6-4ea99d41c665',
                                    service_principal_id='2e90efa1-d53f-45d4-96d8-7adde8a02cdc',
                                    service_principal_password='******'
    )
    query = req.params.get('query')

    if not query:
        try:
            req_body = req.get_json()
        except ValueError:
            pass
        else:
            query = req_body.get('query')

    if query == 'run':
        try:
            ws = Workspace.get(name="vrd-ml",
               subscription_id="b9301f45-7da5-41f6-9125-1331de94f262",
               resource_group="vrd-dev-asia",
               auth=sp
               )
            
            compute_name = 'automl-compute'

            if compute_name in ws.compute_targets:
                compute_target = ws.compute_targets[compute_name]
                if compute_target and type(compute_target) is AmlCompute:
                    print('found compute target. just use it. ' + compute_name)
            else:
                print('creating a new compute target...')
                provisioning_config = AmlCompute.provisioning_configuration(vm_size = 'STANDARD_D2_V2',
                                                                            min_nodes = 0, 
                                                                            max_nodes = 4)
                compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
                compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

            dataset = Dataset.get_by_name(ws, name='datasetfunc')

            train_data, test_data = dataset.random_split(percentage=0.8, seed=223)
            label = "ERP"

            automl_config = AutoMLConfig(task = 'regression',
                            compute_target = compute_name,
                            training_data = train_data,
                            label_column_name = label,
                            validation_data = test_data,
                            # n_cross_validations= 3,
                            primary_metric= 'r2_score',
                            enable_early_stopping= True, 
                            experiment_timeout_hours= 0.3,
                            max_concurrent_iterations= 4,
                            max_cores_per_iteration= -1,
                            verbosity= logging.INFO
                            )

            experiment_name = 'expfunc'
            experiment = Experiment(workspace = ws, name = experiment_name)

            run = experiment.submit(automl_config, show_output = True)                
            run

            run.wait_for_completion()
        except ValueError:
            pass
        return func.HttpResponse("AutoML Run Completed")
    else:
        return func.HttpResponse(
             "This HTTP triggered function executed successfully. Pass a name in the query string or in the request body for a personalized response.",
             status_code=200
        )

Exemple #16

0

Afficher le fichier

Fichier : score.py Projet : jianming93/ms_azure_grab_challenge

        return sum(edge_lengths)
    else:
        raise ValueError('Path not foun')


GRAPH_FILE_PATH = "https://grab5033896937.blob.core.windows.net/azureml/Dataset/grab/singapore.graphml"

try:
    # load workspace configuration from the config.json file in the current folder.
    #ws = Workspace.from_config()

    ws = Workspace.get(name="<<Insert Name>>",
                       subscription_id="<<Insert Subscription Id>>",
                       resource_group="<<Insert Resource Group>>")

    dataset = Dataset.get_by_name(ws, 'sg_graphml')

    # list the files referenced by sg_graphml dataset
    GRAPH_FILE_PATH = dataset.to_path()

    G = ox.load_graphml(GRAPH_FILE_PATH)
except:
    G = ox.graph_from_place('Singapore', network_type='drive')
    ox.save_graphml(G, filepath=GRAPH_FILE_PATH)


def init():
    global model
    # Get the path where the deployed model can be found.
    model_path = Model.get_model_path('grab-model-reg')
    model = joblib.load(model_path)

Exemple #17

0

Afficher le fichier

def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print(f"get_workspace: {aml_workspace}")

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print(f"aml_compute: {aml_compute}")

    # Prepare the dataset input
    data_store = aml_workspace.get_default_datastore()
    print("data_store: %s" % data_store.name)

    train_ds_name = e.dataset_name
    train_data_path = e.datafile_path
    sources_directory_train = e.sources_directory_train
    pipeline_name = e.pipeline_name
    build_id = e.build_id

    # Register the train dataset
    if (train_ds_name not in aml_workspace.datasets):
        train_path_on_datastore = train_data_path  # +'/*.csv'
        train_ds_data_path = [(data_store, train_path_on_datastore)]
        train_ds = Dataset.File.from_files(path=train_ds_data_path,
                                           validate=False)
        train_ds = train_ds.register(workspace=aml_workspace,
                                     name=train_ds_name,
                                     description='train data',
                                     tags={'format': 'CSV'},
                                     create_new_version=True)
    else:
        train_ds = Dataset.get_by_name(aml_workspace, train_ds_name)

    train_input = train_ds.as_named_input('train_input')

    # Conda environment
    environment = Environment.from_conda_specification(
        "myenv", os.path.join(sources_directory_train,
                              "conda_dependencies.yml"))
    # Logging into Azure Application Insights
    env = {
        "APPLICATIONINSIGHTS_CONNECTION_STRING":
        e.applicationinsights_connection_string
    }
    env['AZUREML_FLUSH_INGEST_WAIT'] = ''
    env['DISABLE_ENV_MISMATCH'] = True
    environment.environment_variables = env

    from ff.util.helper import build_parallel_run_config

    # PLEASE MODIFY the following three settings based on your compute and
    # experiment timeout.
    process_count_per_node = 6
    node_count = 3
    # this timeout(in seconds) is inline with AutoML experiment timeout or (no
    # of iterations * iteration timeout)
    run_invocation_timeout = 3700

    parallel_run_config = build_parallel_run_config(sources_directory_train,
                                                    environment, aml_compute,
                                                    node_count,
                                                    process_count_per_node,
                                                    run_invocation_timeout)

    from azureml.pipeline.core import PipelineData

    output_dir = PipelineData(name="training_output", datastore=data_store)

    #from azureml.contrib.pipeline.steps import ParallelRunStep
    from azureml.pipeline.steps import ParallelRunStep

    parallel_run_step = ParallelRunStep(
        name="many-models-training",
        parallel_run_config=parallel_run_config,
        allow_reuse=False,
        inputs=[train_input],
        output=output_dir
        # models=[],
        # arguments=[]
    )

    pipeline = Pipeline(workspace=aml_workspace, steps=parallel_run_step)
    pipeline._set_experiment_name
    pipeline.validate()
    published_pipeline = pipeline.publish(name=pipeline_name,
                                          description="FF AutomML pipeline",
                                          version=build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')

Exemple #18

0

Afficher le fichier

Fichier : hyperdrive_train.py Projet : sebastianbirk/udacity-aml-engineer-nanodegree

def main():

    run = Run.get_context()
    ws = run.experiment.workspace

    # Add arguments to script
    parser = argparse.ArgumentParser()

    parser.add_argument("--learning_rate",
                        type=float,
                        default=0.3,
                        help="Boosting learning rate (xgb's 'eta')")
    parser.add_argument("--n_estimators",
                        type=int,
                        default=100,
                        help="Number of boosting rounds")
    parser.add_argument("--max_depth",
                        type=int,
                        default=6,
                        help="Maximum tree depth for base learners")
    parser.add_argument(
        "--min_child_weight",
        type=int,
        default=1,
        help="Minimum sum of instance weight(hessian) needed in a child")
    parser.add_argument(
        "--gamma",
        type=float,
        default=0,
        help=
        "Minimum loss reduction required to make a further partition on a leaf node of the tree"
    )
    parser.add_argument("--subsample",
                        type=float,
                        default=1.0,
                        help="Subsample ratio of the training instance")
    parser.add_argument(
        "--colsample_bytree",
        type=float,
        default=1.0,
        help="Subsample ratio of columns when constructing each tree")
    parser.add_argument("--reg_lambda",
                        type=float,
                        default=1.0,
                        help="L1 regularization term on weights")
    parser.add_argument("--reg_alpha",
                        type=float,
                        default=0,
                        help="L2 regularization term on weights")

    args = parser.parse_args()

    run.log("learning_rate:", np.float(args.learning_rate))
    run.log("n_estimators:", int(args.n_estimators))
    run.log("max_depth:", int(args.max_depth))
    run.log("min_child_weight:", int(args.min_child_weight))
    run.log("gamma:", np.float(args.gamma))
    run.log("subsample:", np.float(args.subsample))
    run.log("colsample_bytree:", np.float(args.colsample_bytree))
    run.log("reg_lambda:", np.float(args.reg_lambda))
    run.log("reg_alpha:", np.float(args.reg_alpha))

    dataset = Dataset.get_by_name(ws, name="attrition_train")
    df = dataset.to_pandas_dataframe()

    X_train, X_val, y_train, y_val = data_prep(df)

    clf = XGBClassifier(learning_rate=args.learning_rate,
                        n_estimators=args.n_estimators,
                        max_depth=args.max_depth,
                        min_child_weight=args.min_child_weight,
                        gamma=args.gamma,
                        subsample=args.subsample,
                        colsample_bytree=args.colsample_bytree,
                        reg_lambda=args.reg_lambda,
                        reg_alpha=args.reg_alpha)

    clf.fit(X_train, y_train)

    accuracy = np.round(clf.score(X_val, y_val), 3)
    run.log("accuracy", np.float(accuracy))

    auc_weighted = np.round(
        roc_auc_score(y_val, clf.predict(X_val), average='weighted'), 3)
    run.log("AUC_weighted", np.float(auc_weighted))

    os.makedirs("outputs", exist_ok=True)
    # files saved in the "outputs" folder are automatically uploaded into run history
    joblib.dump(clf, "outputs/hyperdrive_model.pkl")

Exemple #19

0

Afficher le fichier

Fichier : evaluate.py Projet : ghousethanedar/MLops02

    else:
        print(
            'Current model does NOT perform better and thus will NOT be deployed!'
        )

eval_info = {}
eval_info["model_acc"] = latest_model_accuracy
eval_info["deployed_model_acc"] = current_model_accuracy
eval_info["deploy_model"] = deploy_model
eval_info['train_run_id'] = latest_model_run_id
eval_info['eval_run_id'] = run.id

if deploy_model:
    os.chdir(args.input)
    surge_ds_name = 'Surge Dataset'
    surge_ds = Dataset.get_by_name(workspace=ws, name=surge_ds_name)

    model_description = 'Machine learning model to classify the surge price category'

    # Create model datasheet
    from datetime import datetime
    from pytz import timezone

    etz = 'US/Eastern'
    time_stamp = datetime.now(timezone(etz))
    time_stamp_str = time_stamp.strftime('%A %m/%d/%Y %I:%M:%S%p')
    model_tags = {}
    model_tags['title'] = 'Surge catefory classifier'
    model_tags[
        'datasheet_description'] = 'Data sheet last updated: ' + time_stamp_str
    model_tags[

Exemple #20

0

Afficher le fichier

Fichier : WS_Create and Upload_main v8.py Projet : joeaponte/AutoML_Azure-Google-H2o

def RunAutoML():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    location = request.json['location']
    target_var = request.json['target_var']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')

    dataset_name = file_name

    # Get a dataset by name
    df = Dataset.get_by_name(workspace=ws, name=dataset_name)
    stock_dataset_df = df.to_pandas_dataframe()
    print('file successfully recieved.')
    stock_dataset_df.head()
    #stock_dataset_json = stock_dataset_df.to_json(orient='split')
    #print(stock_dataset_json)
    y_df = stock_dataset_df[target_var].values
    x_df = stock_dataset_df.drop([target_var], axis=1)
    print(y_df)
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    max_concurrent_iterations = request.json['max_concurrent_iterations']
    best_model = request.json['best_model']

    #n_cross_validations = request.json['n_cross_validations']

    try:
        automl_settings = {
            "name": ExperimentName,
            "iteration_timeout_minutes": iteration_timeout_minutes,
            "iterations": iterations,
            "n_cross_validations": n_cross_validations,
            "primary_metric": primary_metric,
            "preprocess": True,
            "max_concurrent_iterations": max_concurrent_iterations,
            "verbosity": logging.INFO
        }

        automl_config = AutoMLConfig(
            task=tasks,
            debug_log='automl_errors.log',
            path=
            'D:\\Stock_Prediction\\AutoML_Azure\\python\\Flask_API_Azure\\log',
            #compute_target = 'Automlvm',
            X=x_df,
            y=y_df,
            **automl_settings,
        )

        experiment = Experiment(ws, ExperimentName)
        remote_run = experiment.submit(automl_config, show_output=True)
        best_run, fitted_model = remote_run.get_output()
        #print(best_run)
        print(best_run.get_file_names())
        #Register the model
        from datetime import date
        model = best_run.register_model(model_name=best_model +
                                        str(date.today()),
                                        model_path='outputs/model.pkl')
        print(model.name, model.id, model.version, sep='\t')
        children = list(remote_run.get_children())
        metricslist = {}
        for run in children:
            properties = run.get_properties()
            metrics = {
                k: v
                for k, v in run.get_metrics().items() if isinstance(v, float)
            }
            metricslist[int(properties['iteration'])] = metrics

        rundata = pd.DataFrame(metricslist).sort_index(1)
        rundata.rename(column={
            0: "one",
            1: "two",
            2: "three",
            3: "four",
            4: "five",
            5: "six",
            6: "seven",
            7: "right",
            8: "nine",
            9: "ten",
        },
                       inplace=True)
        rundata_toJson = rundata.to_json(orient='columns')
        print(rundata_toJson)
        return rundata_toJson
    except:

        return 'error'

Exemple #21

0

Afficher le fichier

Fichier : prediction_azureautoml.py Projet : joeaponte/AutoML_Azure-Google-H2o

from azureml.core.experiment import Experiment
from azureml.core import Run
experiment = Experiment(ws, 'Myexp2_v1_test21')
best_run = Run(experiment=experiment,
               run_id='AutoML_74e9d9dc-f347-4392-b8bb-3edeb4a6afad_8')
fitted_model = Run(experiment=experiment,
                   run_id='AutoML_74e9d9dc-f347-4392-b8bb-3edeb4a6afad_8')
#print(best_run.register_model()
print(fitted_model)

# Get a dataset by name
from azureml.core.dataset import Dataset

file_name = '2018Q4PredictionTrainedSet101.csv'
stock_dataset = Dataset.get_by_name(ws, '2018Q4PredictionTrainedSet101.csv')
#stock_dataset
#dataset = Dataset.Tabular.from_delimited_files(stock_dataset)
stock_dataset.to_pandas_dataframe().describe()
stock_dataset.take(3).to_pandas_dataframe()

X = stock_dataset.drop_columns(columns=['ActionTaken'])
y = stock_dataset.keep_columns(columns=['ActionTaken'], validate=True)
print(y)
#print('X and y are ready!')
stock_dataset_df = stock_dataset.to_pandas_dataframe()
y_df = stock_dataset_df['ActionTaken'].values
x_df = stock_dataset_df.drop(['ActionTaken'], axis=1)

y_predict = fitted_model.predict(x_df)
print(y_predict)

Exemple #22

0

Afficher le fichier

def main(
    workspace=None,
    dataset_trainandvalidate_name=config.get_default_dataset_name(
        "trainandvalidate"),
):
    """
    Return AutoMLConfig
    """

    if not workspace:
        workspace = package_utils.get_workspace()

    args = aml_compute.parse_args()
    cluster_max_nodes = 5
    args.cluster_max_nodes = cluster_max_nodes
    args.cluster_sku = "Standard_D12_v2"
    compute_target = aml_compute.main(args)
    logger.info(msg="main",
                extra={"compute_target": compute_target.serialize()})

    trainandvalidate = Dataset.get_by_name(
        workspace=workspace,
        name=dataset_trainandvalidate_name,
    )

    model_settings = {
        "task": "classification",
        "primary_metric": "norm_macro_recall",
    }

    ensemble_settings = {
        "iterations":
        15,
        "allowed_models":
        ["LightGBM", "LogisticRegression", "SGD", "XGBoostClassifier"],
        "enable_voting_ensemble":
        True,
        "enable_stack_ensemble":
        False,
    }

    dataset_settings = {
        "validation_size": 0.3,
        "featurization": "auto",
        "training_data": trainandvalidate,
        "label_column_name": "Label",
    }

    compute_settings = {
        "compute_target": compute_target,
        "max_cores_per_iteration": -1,
        "max_concurrent_iterations": cluster_max_nodes,
        "experiment_timeout_hours": 1.5,
    }

    automl_config = AutoMLConfig(
        **model_settings,
        **ensemble_settings,
        **dataset_settings,
        **compute_settings,
    )

    return automl_config

Exemple #23

0

Afficher le fichier

Fichier : WS_Create and Upload_main v5.py Projet : joeaponte/AutoML_Azure-Google-H2o

def RunAutoML():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    #location = request.json['location']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')

    dataset_name = file_name

    # Get a dataset by name
    df = Dataset.get_by_name(workspace=ws, name=dataset_name)
    stock_dataset_df = df.to_pandas_dataframe()
    print('file successfully recieved.')
    stock_dataset_df.head()
    #stock_dataset_json = stock_dataset_df.to_json(orient='split')
    #print(stock_dataset_json)
    y_df = stock_dataset_df['ActionTaken'].values
    x_df = stock_dataset_df.drop(['ActionTaken'], axis=1)
    print(y_df)
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    max_concurrent_iterations = request.json['max_concurrent_iterations']

    #n_cross_validations = request.json['n_cross_validations']

    try:
        automl_settings = {
            "name": ExperimentName,
            "iteration_timeout_minutes": iteration_timeout_minutes,
            "iterations": iterations,
            "n_cross_validations": n_cross_validations,
            "primary_metric": primary_metric,
            "preprocess": True,
            "max_concurrent_iterations": max_concurrent_iterations,
            "verbosity": logging.INFO
        }

        automl_config = AutoMLConfig(
            task=tasks,
            debug_log='automl_errors.log',
            path=os.getcwd(),
            #compute_target = 'Automlvm',
            X=x_df,
            y=y_df,
            **automl_settings,
        )

        experiment = Experiment(ws, 'automl_local_v2')
        remote_run = experiment.submit(automl_config, show_output=True)
        children = list(remote_run.get_children())
        metricslist = {}
        for run in children:
            properties = run.get_properties()
            metrics = {
                k: v
                for k, v in run.get_metrics().items() if isinstance(v, float)
            }
            metricslist[int(properties['iteration'])] = metrics

        rundata = pd.DataFrame(metricslist).sort_index(1)
        rundata_toJson = rundata.to_json(orient='columns')

        return rundata_toJson
    except:

        return 'error'

Exemple #24

0

Afficher le fichier

Fichier : main_azure.py Projet : Anishtalukdar/introact-repo

def Prediction():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    location = request.json['location']
    file_name = request.json['file_name']
    target_var = request.json['target_var']
    best_model = request.json['best_model']
    Model_path = request.json['Model_path']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')

    dataset_name = file_name
    # Get a dataset by name
    df = Dataset.get_by_name(workspace=ws, name=dataset_name)
    stock_dataset_df = df.to_pandas_dataframe()
    print('file successfully recieved.')
    X = df.drop_columns(columns=[target_var])
    y = df.keep_columns(columns=[target_var], validate=True)
    y_df = stock_dataset_df[target_var].values
    x_df = stock_dataset_df.drop([target_var], axis=1)
    print(y)

    #from azureml.core import Run
    #experiment=Experiment(ws, workspace_name)
    #from azureml.core.model import Model
    #model = Model(ws, name=Model_path)
    #model.download(exist_ok=True)
    from sklearn.externals import joblib
    cwd = 'D:\DCSAIAUTOML\BestModels\Azure'
    model_path = os.path.join(cwd, Model_path, best_model, "outputs")
    #model_path1 = os.path.join(model_path, "outputs", "model.pkl")
    print(model_path)
    os.chdir(model_path)
    model = joblib.load('model.pkl')
    #best_run = Run(experiment=experiment, run_id='AutoML_74e9d9dc-f347-4392-b8bb-3edeb4a6afad_8')
    #fitted_model = Run(experiment=experiment, run_id='AutoML_74e9d9dc-f347-4392-b8bb-3edeb4a6afad_8')
    print(model)
    try:
        y_predict = model.predict(x_df)
        print(y_predict)
        #prediction_toJson = y_predict.to_json(orient='columns')
        #print(prediction_toJson)
        df = pd.DataFrame(y_predict)
        df.rename(columns={0: "Prediction"}, inplace=True)
        #stock_df = stock_dataset_df[['SepalLengthCm','SepalWidthCm','Species']]
        result = pd.concat([stock_dataset_df, df], axis=1)
        result.to_csv(
            'D:\\PredictionResult\\Azure\\prediction_azure_health.csv',
            index=False,
            date_format='%Y%m%d')
        result.head()
        prediction_toJson = result.to_json(orient='records')
        return prediction_toJson

    except Exception as e:
        error_statement = str(e)
        print("Error statement: ", error_statement)
        return error_statement

Exemple #25

0

Afficher le fichier

Fichier : evaluate.py Projet : solliancenet/mcw-mlops-starter-v3

        deploy_model = True
        print('Current model performs better and will be deployed!')
    else:
        print('Current model does NOT perform better and thus will NOT be deployed!')

eval_info = {}
eval_info["model_acc"] = latest_model_accuracy
eval_info["deployed_model_acc"] = current_model_accuracy
eval_info["deploy_model"] = deploy_model
eval_info['train_run_id'] = latest_model_run_id
eval_info['eval_run_id'] = run.id

if deploy_model:
    os.chdir(args.input)
    cardata_ds_name = 'connected_car_components'
    cardata_ds  = Dataset.get_by_name(workspace=ws, name=cardata_ds_name)
    glove_ds_name = 'glove_6B_100d'
    glove_ds = Dataset.get_by_name(workspace=ws, name=glove_ds_name)
    
    model_description = 'Deep learning model to classify the descriptions of car components as compliant or non-compliant.'
    
    # Create model datasheet
    from datetime import datetime
    from pytz import timezone
    etz = 'US/Eastern'
    time_stamp = datetime.now(timezone(etz))
    time_stamp_str = time_stamp.strftime('%A %m/%d/%Y %I:%M:%S%p')
    model_tags = {}
    model_tags['title'] = 'Connected car components classifier'
    model_tags['datasheet_description'] = 'Data sheet last updated: ' + time_stamp_str
    model_tags['details'] = 'This model was developed for automatically classifying car components as compliant or not compliant. The model leverages deep learning technologies with Natural Language Processing techniques to scan through vehicle specification documents to find compliance issues with new regulations.'

Exemple #26

0

Afficher le fichier

Fichier : main_azure.py Projet : Anishtalukdar/introact-repo

def RunAutoMLReg():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    location = request.json['location']
    target_var = request.json['target_var']
    cluster_name = request.json['cluster_name']
    best_model = request.json['best_model']
    #best_model = request.json['best_model']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')
    #compute_target = AmlCompute(ws, cluster_name)
    compute_target = ws.compute_targets[cluster_name]
    print('Found existing AML compute context.')
    dataset_name = file_name

    # Get a dataset by name
    df = Dataset.get_by_name(workspace=ws, name=dataset_name)
    #stock_dataset_df = df.to_pandas_dataframe()
    print('file successfully recieved.')
    #stock_dataset_df.head()
    #stock_dataset_json = stock_dataset_df.to_json(orient='split')
    #print(stock_dataset_json)
    X = df.drop_columns(columns=[target_var])
    y = df.keep_columns(columns=[target_var], validate=True)
    #y_df = stock_dataset_df[target_var].values
    #x_df = stock_dataset_df.drop([target_var], axis=1)
    print(y)
    # create a new RunConfig object
    conda_run_config = RunConfiguration(framework="python")
    conda_run_config.environment.docker.enabled = True
    conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
    cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                                  conda_packages=['numpy', 'py-xgboost<=0.90'])
    conda_run_config.environment.python.conda_dependencies = cd
    print('run config is ready')
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    max_concurrent_iterations = request.json['max_concurrent_iterations']

    try:
        automl_settings = {
            "name": ExperimentName,
            "iteration_timeout_minutes": iteration_timeout_minutes,
            "featurization": 'auto',
            "iterations": iterations,
            "n_cross_validations": n_cross_validations,
            "primary_metric": primary_metric,
            "preprocess": True,
            "max_concurrent_iterations": max_concurrent_iterations
            #"verbosity": logging.INFO
        }

        automl_config = AutoMLConfig(
            task=tasks,
            debug_log='automl_errors.log',
            blacklist_models=['XGBoost'],
            #path=os.getcwd(),
            compute_target=compute_target,
            #run_configuration=conda_run_config,
            X=X,
            y=y,
            **automl_settings,
        )

        experiment = Experiment(ws, ExperimentName)
        remote_run = experiment.submit(automl_config, show_output=True)
        remote_run.flush(timeout_seconds=400)
        children = list(remote_run.get_children())
        metricslist = {}
        for run in children:
            properties = run.get_properties()
            metrics = {
                k: v
                for k, v in run.get_metrics().items() if isinstance(v, float)
            }
            metricslist[int(properties['iteration'])] = metrics

        rundata = pd.DataFrame(metricslist).sort_index(axis=1,
                                                       by=primary_metric)
        rundata = rundata.drop([
            'mean_absolute_percentage_error',
            'normalized_median_absolute_error',
            'normalized_root_mean_squared_log_error',
            'root_mean_squared_log_error'
        ])
        rundata.rename(columns={
            0: "one",
            1: "two",
            2: "three",
            3: "four",
            4: "five",
            5: "six",
            6: "seven",
            7: "eight",
            8: "nine",
            9: "ten",
        },
                       inplace=True)
        iterations_toJson = rundata.to_json(orient='columns')
        print(iterations_toJson)
        best_run, fitted_model = remote_run.get_output()
        best_run_toJson = best_run.get_metrics()
        cwd = 'D:/DCSAIAUTOML/BestModels/Azure'
        best_model_name = best_run.name
        model = remote_run.register_model(description=best_model)
        print(model.name, model.id, model.version, sep='\t')
        model_path = os.path.join(cwd, best_model, best_model_name)
        print(model_path)
        #print("Model DownLoad Complete")
        #model = Model(workspace=ws, name=model.name)
        #model.download_files(target_dir=model_path)
        #dict = {}
        #dict['iterations_toJson'] = iterations_toJson
        #dict['best_run_toJson'] = best_run_toJson
        #print(best_run.get_file_names())
        #Register the model
        #from datetime import date

        best_model_id = best_run.name

        var1 = "@"
        var2 = var1 + best_model_id

        Reg_model_name = model.name
        var4 = var1 + Reg_model_name

        best_run.flush(timeout_seconds=3600)
        best_run.download_files(output_directory=model_path)
        # importing required modules
        #import shutil
        #output_path = os.path.join(model_path, best_model_id)
        #dir_name1 = "D:\\DCSAIAUTOML\\BestModels\\Azure\\my_azure_best"
        #dir_name1 = "D:\\DCSAIAUTOML\\BestModels\\Azure\\my_azure_best\\my_azure_best"
        #shutil.make_archive(model_path,'zip',model_path)

        #zipf = zipfile.ZipFile(best_model_id+'.zip', 'w', zipfile.ZIP_DEFLATED)
        #for root, dirs, files in os.walk(model_path):
        #for file in files:
        #zipf.write(os.path.join(root, file))

        #def zipdir(path, ziph):
        # ziph is zipfile handle
        #import os
        #for root, dirs, files in os.walk(path):
        #for file in files:
        #ziph.write(os.path.join(root, file))

        #zipdir(model_path, zipf)
        #remote_run.clean_preprocessor_cache()
        print("ready to return")
        var5 = "no exception"
        return '{} {} {} {} {}'.format(iterations_toJson, var2, var4, var1,
                                       var5)
        #return iterations_toJson
    except Exception as e:
        error_statement = str(e)
        print("Error statement: ", error_statement)
        model_path1 = os.path.join(model_path, 'outputs')
        file_name = 'model.pkl'
        print("in exception: ", model_path1)
        src = 'D:\\Final Script_dev'
        full_file_name = os.path.join(src, file_name)
        import shutil
        #remote_run.download_file('model.pkl', output_file_path=model_path1)
        if os.path.isfile(full_file_name):
            shutil.copy(full_file_name, model_path1)
        return '{} {} {} {} {}'.format(iterations_toJson, var2, var4, var1,
                                       error_statement)

Exemple #27

0

Afficher le fichier

Fichier : create_runconfig_sklearn_yaml.py Projet : MartijnGroen1971/aml-mlops-workshop

workspace = Workspace.from_config(auth=AzureCliAuthentication())

# Define the conda dependencies
cd = CondaDependencies(conda_dependencies_file_path=os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    'conda_dependencies_sklearn.yml'))

# define compute
compute_target = '20cpucluster'

# define data set names
input_name_train = 'newsgroups_train'
input_name_test = 'newsgroups_test'

# Retrieve datsets
dataset_train = Dataset.get_by_name(workspace, name=input_name_train)
dataset_test = Dataset.get_by_name(workspace, name=input_name_test)

# Runconfig
amlcompute_run_config = RunConfiguration(
    script="train.py",
    conda_dependencies=cd,
    framework='Python',
)

amlcompute_run_config.environment.docker.enabled = True
amlcompute_run_config.environment.spark.precache_packages = False
amlcompute_run_config.target = compute_target
amlcompute_run_config.data = {
    input_name_train: load_data(dataset_train, input_name_train),
    input_name_test: load_data(dataset_test, input_name_test)

Exemple #28

0

Afficher le fichier

Fichier : main_azure.py Projet : Anishtalukdar/introact-repo

def RunAutoMLForecast():
    subscription_id = request.json['subscription_id']
    resource_group = request.json['resource_group']
    workspace_name = request.json['workspace_name']
    file_name = request.json['file_name']
    location = request.json['location']
    target_var = request.json['target_var']
    cluster_name = request.json['cluster_name']
    best_model = request.json['best_model']
    time_column_name = request.json['time_column_name']
    max_horizon = request.json['max_horizon']

    ws = Workspace(subscription_id=subscription_id,
                   resource_group=resource_group,
                   workspace_name=workspace_name)

    print("Found workspace {} at location {}".format(ws.name, ws.location))
    print('Found existing Workspace.')
    compute_target = AmlCompute(ws, cluster_name)
    print('Found existing AML compute context.')
    dataset_name = file_name
    time_column_name = time_column_name
    # Get a dataset by name
    dataset = Dataset.get_by_name(workspace=ws,
                                  name=dataset_name).with_timestamp_columns(
                                      fine_grain_timestamp=time_column_name)
    print(dataset)
    #df_ts = Dataset.Tabular.from_delimited_files(df_ts)
    dataset.to_pandas_dataframe().describe()
    dataset.take(3).to_pandas_dataframe()
    print(dataset)
    #y_df = df_ts[target_var].values
    #x_df = df_ts.drop([target_var], axis=1)
    print('file successfully recieved.')
    #stock_dataset_df.head()
    # create a new RunConfig object
    conda_run_config = RunConfiguration(framework="python")
    conda_run_config.environment.docker.enabled = True
    conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
    cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'],
                                  conda_packages=['numpy', 'py-xgboost<=0.80'])
    conda_run_config.environment.python.conda_dependencies = cd
    print('run config is ready')
    ExperimentName = request.json['ExperimentName']
    tasks = request.json['tasks']
    iterations = request.json['iterations']
    n_cross_validations = request.json['n_cross_validations']
    iteration_timeout_minutes = request.json['iteration_timeout_minutes']
    primary_metric = request.json['primary_metric']
    #max_concurrent_iterations = request.json['max_concurrent_iterations']

    automl_settings = {
        'time_column_name': time_column_name,
        'max_horizon': max_horizon,
        "iterations": iterations,
    }

    automl_config = AutoMLConfig(
        task=tasks,
        primary_metric=primary_metric,
        #blacklist_models = ['ExtremeRandomTrees', 'AutoArima', 'Prophet'],
        experiment_timeout_minutes=iteration_timeout_minutes,
        training_data=dataset,
        label_column_name=target_var,
        compute_target=compute_target,
        enable_early_stopping=True,
        n_cross_validations=n_cross_validations,
        #verbosity=logging.INFO,
        **automl_settings)
    print("AutoML config created.")
    experiment = Experiment(ws, ExperimentName)
    remote_run = experiment.submit(automl_config, show_output=True)
    children = list(remote_run.get_children())
    metricslist = {}
    for run in children:
        properties = run.get_properties()
        metrics = {
            k: v
            for k, v in run.get_metrics().items() if isinstance(v, float)
        }
        metricslist[int(properties['iteration'])] = metrics

    rundata = pd.DataFrame(metricslist).sort_index(axis=1, by=primary_metric)
    rundata.rename(columns={
        0: "one",
        1: "two",
        2: "three",
        3: "four",
        4: "five",
        5: "six",
        6: "seven",
        7: "eight",
        8: "nine",
        9: "ten",
    },
                   inplace=True)
    iterations_toJson = rundata.to_json(orient='columns')
    print(iterations_toJson)
    best_run, fitted_model = remote_run.get_output()
    #best_run_toJson = best_run.get_metrics()
    #dict = {}
    #dict['iterations_toJson'] = iterations_toJson
    #dict['best_run_toJson'] = best_run_toJson
    #print(best_run.get_file_names())
    #Register the model
    #from datetime import date
    model = remote_run.register_model(model_name=best_model,
                                      description='AutoML Model')
    print(model.name, model.id, model.version, sep='\t')
    best_model = model.name
    best_model
    var1 = "@"
    var2 = var1 + best_model
    return '{} {}'.format(iterations_toJson, var2)

Exemple #29

0

Afficher le fichier

experiment = Experiment(ws, '<<experiment_name>>')
automl_run = Run(experiment=experiment, run_id='<<run_id>>')

# Check if this AutoML model is explainable
if not automl_check_model_if_explainable(automl_run):
    raise Exception("Model explanations is currently not supported for " +
                    automl_run.get_properties().get('run_algorithm'))

# Download the best model from the artifact store
automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl')

# Load the AutoML model into memory
fitted_model = joblib.load('model.pkl')

# Get the train dataset from the workspace
train_dataset = Dataset.get_by_name(workspace=ws,
                                    name='<<train_dataset_name>>')
# Drop the lablled column to get the training set.
X_train = train_dataset.drop_columns(columns=['<<target_column_name>>'])
y_train = train_dataset.keep_columns(columns=['<<target_column_name>>'],
                                     validate=True)

# Get the train dataset from the workspace
test_dataset = Dataset.get_by_name(workspace=ws, name='<<test_dataset_name>>')
# Drop the lablled column to get the testing set.
X_test = test_dataset.drop_columns(columns=['<<target_column_name>>'])

# Setup the class for explaining the AtuoML models
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model,
                                                             '<<task>>',
                                                             X=X_train,
                                                             X_test=X_test,

Exemple #30

0

Afficher le fichier

from sklearn.linear_model import LogisticRegression
import argparse
import os
import numpy as np
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.model_selection import train_test_split
import pandas as pd
from azureml.core.run import Run
from azureml.core.dataset import Dataset
from azureml.core import Experiment, Workspace
from azureml.data.dataset_factory import TabularDatasetFactory

# Load dataset into data variable:
ws = Workspace.get("quick-starts-ws-126078")
ds = Dataset.get_by_name(ws, name='Heart-Failure')
data = ds.to_pandas_dataframe()

#Split Target and Features in y and x respectively in the clean_data function


def clean_data(data):

    y = data['DEATH_EVENT']
    x = data.drop(['DEATH_EVENT'], axis=1)
    return x, y


x, y = clean_data(data)

# Split data into train and test sets.