"wasbs://feature_engineereddata@stmyappdataopstrstage.blob.core.windows.net/"
)
dbutils.widgets.text("feature_engineered_blob_config",
                     "fs.azure.account.key.MYACCOUNT.blob.core.windows.net")
dbutils.widgets.text("feature_engineered_blob_secretname",
                     "MYCONTAINER@MYACCOUNT")

# COMMAND ----------

# Connect to Azure ML
dbutils.library.installPyPI("azureml-sdk",
                            version="1.0.85",
                            extras="databricks")
from azureml.core import Run
# In an Azure ML run, settings get imported from passed --AZUREML_* parameters
run = Run.get_context(allow_offline=True)

# COMMAND ----------

# Set up storage credentials

spark.conf.set(
    dbutils.widgets.get("training_blob_config"),
    dbutils.secrets.get(scope=dbutils.widgets.get("secretscope"),
                        key=dbutils.widgets.get("training_blob_secretname")),
)

spark.conf.set(
    dbutils.widgets.get("feature_engineered_blob_config"),
    dbutils.secrets.get(
        scope=dbutils.widgets.get("secretscope"),
Exemple #2
0
import os
import argparse
from azureml.core import Run

parser = argparse.ArgumentParser()
parser.add_argument('--model_name',
                    type=str,
                    help='Name under which model will be registered')
parser.add_argument('--model_path', type=str, help='Model directory')
args, _ = parser.parse_known_args()

print(f'Arguments: {args}')
model_name = args.model_name
model_path = args.model_path

# current run is the registration step
current_run = Run.get_context()

# parent run is the overall pipeline
parent_run = current_run.parent
print(f'Parent run id: {parent_run.id}')

# Upload models to pipeline artifacst and register a model from them
parent_run.upload_folder(name='models', path=model_path)
parent_run.register_model(model_path='models', model_name=model_name)
def init():
    # Set Arguments.  These should be all of the hyperparameters you will tune.
    global args
    parser = argparse.ArgumentParser()
    # Hyperparameters
    parser.add_argument('--eta', type=float, default=0.1, help='Learning Rate')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.1,
                        help='Learning Rate')
    parser.add_argument(
        '--scale_pos_weight',
        type=float,
        default=0.6,
        help=
        'Helps with Unbalanced Classes.  Should be Sum(Negative)/Sum(Positive)'
    )
    parser.add_argument('--booster',
                        type=str,
                        default='gbtree',
                        help='The type of Boosting Algorithim')
    parser.add_argument('--min_child_weight',
                        type=float,
                        default=1,
                        help='Controls Overfitting')
    parser.add_argument('--max_depth',
                        type=int,
                        default=6,
                        help='Controls Overfitting')
    parser.add_argument('--gamma',
                        type=float,
                        default=0,
                        help='Make Algorithm Conservative')
    parser.add_argument('--subsample',
                        type=float,
                        default=1,
                        help='Controls Overfitting')
    parser.add_argument('--colsample_bytree',
                        type=float,
                        default=1,
                        help='Defines Sampling')
    parser.add_argument('--reg_lambda',
                        type=float,
                        default=1,
                        help='Controls Overfitting')
    parser.add_argument('--alpha',
                        type=float,
                        default=0,
                        help='Reduces Dimensionality')
    #    parser.add_argument('--objective', type=str, default='binary:logistic',reg:logistic,multi:softmax
    parser.add_argument('--objective',
                        type=str,
                        default='multi:softmax',
                        help='Defines Training Objective Metric')
    # Other Parameters
    parser.add_argument('--train_dataset_name',
                        type=str,
                        help='Name of Training Dataset')
    parser.add_argument('--val_dataset_name',
                        type=str,
                        help='Name of Validation Dataset')
    parser.add_argument('--target_column_name',
                        type=str,
                        help='Name of variable to score')
    parser.add_argument(
        '--k_folds',
        type=int,
        default=10,
        help='Number of folds to split your data into for cross validation')
    parser.add_argument(
        '--shuffle_split_size',
        type=float,
        help=
        'Percentage of data to hold out for testing during cross validation')
    parser.add_argument(
        '--confidence_level',
        type=float,
        default=0.95,
        help='Level of confidence to set for your confidence interval ()')
    args = parser.parse_args()
    print(args)

    # Set the Run context for logging
    global run
    run = Run.get_context()

    # log your hyperparameters,
    run.log('eta', np.float(args.eta))
    run.log('learning_rate', np.float(args.learning_rate))
    run.log('scale_pos_weight', np.float(args.scale_pos_weight))
    run.log('booster', np.str(args.booster))
    run.log('min_child_weight', np.float(args.min_child_weight))
    run.log('max_depth', np.float(args.max_depth))
    run.log('gamma', np.float(args.gamma))
    run.log('subsample', np.float(args.subsample))
    run.log('colsample_bytree', np.float(args.colsample_bytree))
    run.log('reg_lambda', np.float(args.reg_lambda))
    run.log('alpha', np.float(args.alpha))
    run.log('objective', np.str(args.objective))
Exemple #4
0
from azureml.core import Run
run = Run.get_context()  # get hold of the current run

import argparse, numpy as np, os
# let user feed in 4 parameters: the location of the data files (container+folder from datastore),
# the regularization rate of the logistic regression algorythm and the model name
parser = argparse.ArgumentParser()
parser.add_argument('--reg', type=float, help='regularization ate')
parser.add_argument('--datapreparation_output',
                    type=str,
                    help='datapreparation_output')
parser.add_argument('--datatrain_output', type=str, help='datatrain_output')
parser.add_argument('--is_directory', type=bool, help='is_directory')
args = parser.parse_args()

reg = args.reg
print('Regularization Rate:', reg)
run.log('Regularization Rate', reg)

datapreparation_output = args.datapreparation_output
print('datapreparation_output:', datapreparation_output)
run.log('datapreparation_output', datapreparation_output)

datatrain_output = args.datatrain_output
print('datatrain_output:', datatrain_output)
run.log('datatrain_output', datatrain_output)

is_directory = args.is_directory
print('is_directory:', is_directory)
run.log('is_directory', is_directory)
Exemple #5
0
def train_model(df, target):
    # Creating dummy columns for each categorical feature
    categorical = []
    for col, value in df.iteritems():
        if value.dtype == 'object':
            categorical.append(col)
    # Store the numerical columns in a list numerical
    numerical = df.columns.difference(categorical)
    numeric_transformations = [
        ([f],
         Pipeline(steps=[('imputer', SimpleImputer(
             strategy='median')), ('scaler', StandardScaler())]))
        for f in numerical
    ]
    categorical_transformations = [([f],
                                    OneHotEncoder(handle_unknown='ignore',
                                                  sparse=False))
                                   for f in categorical]
    transformations = numeric_transformations + categorical_transformations
    # Append classifier to preprocessing pipeline
    clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)
                           ), ('classifier',
                               LogisticRegression(solver='lbfgs'))])
    # Split data into train and test
    x_train, x_test, y_train, y_test = train_test_split(df,
                                                        target,
                                                        test_size=0.35,
                                                        random_state=0,
                                                        stratify=target)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(classification_report(y_test, y_pred))
    accu = accuracy_score(y_test, y_pred)
    model_file_name = 'classifier.pkl'
    # save model in the outputs folder so it automatically get uploaded
    with open(model_file_name, 'wb') as file:
        joblib.dump(value=clf,
                    filename=os.path.join('./outputs/', model_file_name))
    run = Run.get_context()
    run.log("accuracy", accu)
    # we upload the model into the experiment artifact store, but do not register it as a model until unit tests are sucessfully passed in next ML step
    run.upload_file(model_file_name, os.path.join('./outputs/',
                                                  model_file_name))
    #Interpret steps
    client = ExplanationClient.from_run(run)
    # Using SHAP TabularExplainer
    explainer = TabularExplainer(clf.steps[-1][1],
                                 initialization_examples=x_train,
                                 features=df.columns,
                                 classes=["Not leaving", "leaving"],
                                 transformations=transformations)
    # explain overall model predictions (global explanation)
    global_explanation = explainer.explain_global(x_test)
    # Sorted SHAP values
    print('ranked global importance values: {}'.format(
        global_explanation.get_ranked_global_values()))
    # Corresponding feature names
    print('ranked global importance names: {}'.format(
        global_explanation.get_ranked_global_names()))
    # Feature ranks (based on original order of features)
    print('global importance rank: {}'.format(
        global_explanation.global_importance_rank))
    # uploading global model explanation data for storage or visualization in webUX
    # the explanation can then be downloaded on any compute
    # multiple explanations can be uploaded
    client.upload_model_explanation(global_explanation,
                                    comment='global explanation: all features')
def model_train(df):
    run = Run.get_context()

    df.drop("step", axis=1, inplace=True)
    df.drop("isFlaggedFraud", axis=1, inplace=True)

    # Dropping for demo reasons
    df.drop("nameOrig", axis=1, inplace=True)
    df.drop("nameDest", axis=1, inplace=True)

    y_raw = df['isFraud']
    X_raw = df.drop('isFraud', axis=1)

    categorical_features = X_raw.select_dtypes(include=['object']).columns
    numeric_features = X_raw.select_dtypes(include=['int64', 'float']).columns

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value="missing")),
        ('onehotencoder', OneHotEncoder(categories='auto', sparse=False))])

    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())])

    feature_engineering_pipeline = ColumnTransformer(
        transformers=[
            ('numeric', numeric_transformer, numeric_features),
            ('categorical', categorical_transformer, categorical_features)
        ], remainder="drop")

    # Encode Labels
    le = LabelEncoder()
    encoded_y = le.fit_transform(y_raw)

    # Train test split
    X_train, X_test, y_train, y_test = train_test_split(X_raw, encoded_y, test_size=0.20, stratify=encoded_y, random_state=42)

    # Create sklearn pipeline
    clf = Pipeline(steps=[('preprocessor', feature_engineering_pipeline),
                             ('classifier', LogisticRegression(solver="saga", max_iter=250))])
    # Train the model
    clf.fit(X_train, y_train)

    # Capture metrics
    train_acc = clf.score(X_train, y_train)
    test_acc = clf.score(X_test, y_test)
    print("Training accuracy: %.3f" % train_acc)
    print("Testing accuracy: %.3f" % test_acc)

    # Log to Azure ML
    run.log('Train accuracy', train_acc)
    run.log('Test accuracy', test_acc)
    
    # Explain model
    explainer = TabularExplainer(clf.steps[-1][1],
                                 initialization_examples=X_train, 
                                 features=X_raw.columns, 
                                 classes=["NotFraud", "Fraud"], 
                                 transformations=feature_engineering_pipeline)

    # explain overall model predictions (global explanation)
    global_explanation = explainer.explain_global(X_test)

    # Sorted SHAP values
    print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))
    # Corresponding feature names
    print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))
    # Feature ranks (based on original order of features)
    print('global importance rank: {}'.format(global_explanation.global_importance_rank))
      
    client = ExplanationClient.from_run(run)
    client.upload_model_explanation(global_explanation, comment='Global Explanation: All Features')

    return clf
Exemple #7
0
def main():

    run = Run.get_context()
    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        sys.path.append(os.path.abspath("./code/util"))  # NOQA: E402
        from model_helper import get_model_by_build_id
        # For local development, set values in this section
        load_dotenv()
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        tenant_id = os.environ.get("TENANT_ID")
        model_name = os.environ.get("MODEL_NAME")
        app_id = os.environ.get('SP_APP_ID')
        app_secret = os.environ.get('SP_APP_SECRET')
        build_id = os.environ.get('BUILD_BUILDID')
        service_principal = ServicePrincipalAuthentication(
            tenant_id=tenant_id,
            service_principal_id=app_id,
            service_principal_password=app_secret)

        aml_workspace = Workspace.get(name=workspace_name,
                                      subscription_id=subscription_id,
                                      resource_group=resource_group,
                                      auth=service_principal)
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
        run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
    else:
        sys.path.append(os.path.abspath("./util"))  # NOQA: E402
        from model_helper import get_model_by_build_id
        ws = run.experiment.workspace
        exp = run.experiment
        run_id = 'amlcompute'

    parser = argparse.ArgumentParser("register")
    parser.add_argument(
        "--build_id",
        type=str,
        help="The Build ID of the build triggering this pipeline run",
    )
    parser.add_argument(
        "--run_id",
        type=str,
        help="Training run ID",
    )
    parser.add_argument(
        "--model_name",
        type=str,
        help="Name of the Model",
        default="sklearn_regression_model.pkl",
    )
    parser.add_argument(
        "--validate",
        type=str,
        help="Set to true to only validate if model is registered for run",
        default=False,
    )

    args = parser.parse_args()
    if (args.build_id is not None):
        build_id = args.build_id
    if (args.run_id is not None):
        run_id = args.run_id
    if (run_id == 'amlcompute'):
        run_id = run.parent.id
    if (args.validate is not None):
        validate = args.validate
    model_name = args.model_name

    if (validate):
        try:
            get_model_by_build_id(model_name, build_id, exp.workspace)
            print("Model was registered for this build.")
        except Exception as e:
            print(e)
            print("Model was not registered for this run.")
            sys.exit(1)
    else:
        if (build_id is None):
            register_aml_model(model_name, exp, run_id)
        else:
            run.tag("BuildId", value=build_id)
            register_aml_model(model_name, exp, run_id, build_id)
Exemple #8
0
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.

from azureml.core import Run

submitted_run = Run.get_context()
submitted_run.log(name="message", value="Hello from run!")
Exemple #9
0
def main():

    run = Run.get_context()
    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        sys.path.append(os.path.abspath("./code/util"))  # NOQA: E402
        from model_helper import get_model_by_tag
        # For local development, set values in this section
        load_dotenv()
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        build_id = os.environ.get('BUILD_BUILDID')
        aml_workspace = Workspace.get(
            name=workspace_name,
            subscription_id=subscription_id,
            resource_group=resource_group
        )
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
    else:
        sys.path.append(os.path.abspath("./util"))  # NOQA: E402
        from model_helper import get_model_by_tag
        ws = run.experiment.workspace
        exp = run.experiment

    parser = argparse.ArgumentParser("register")
    parser.add_argument(
        "--build_id",
        type=str,
        help="The Build ID of the build triggering this pipeline run",
    )
    parser.add_argument(
        "--model_name",
        type=str,
        help="Name of the Model"
    )
    parser.add_argument(
        "--output_model_version_file",
        type=str,
        help="Name of a file to write model version to"
    )

    args = parser.parse_args()
    if (args.build_id is not None):
        build_id = args.build_id
    model_name = args.model_name

    try:
        tag_name = 'BuildId'
        model = get_model_by_tag(
            model_name, tag_name, build_id, exp.workspace)
        if (model is not None):
            print("Model was registered for this build.")
        if (model is None):
            print("Model was not registered for this run.")
            sys.exit(1)
    except Exception as e:
        print(e)
        print("Model was not registered for this run.")
        sys.exit(1)

    # Save the Model Version for other AzDO jobs after script is complete
    if args.output_model_version_file is not None:
        with open(args.output_model_version_file, "w") as out_file:
            out_file.write(str(model.version))
def init():
    global ws
    current_run = Run.get_context()
    ws = current_run.experiment.workspace

    print("Init complete")
parser.add_argument("--date_column", type=str, help="date_column")
parser.add_argument("--hour_column", type=str, help="hour_column")
parser.add_argument("--datetime_column_name", type=str, help="datetime_column_name")
parser.add_argument("--pivot_columns", type=str, help="pivot_columns")
parser.add_argument("--value_column", type=str, help="value_column")
parser.add_argument("--output", type=str, help="output")
args = parser.parse_args()
print("Date Column: %s" % args.date_column)
print("Hour Column: %s" % args.hour_column)
print("Datetime Column Name: %s" % args.datetime_column_name)
print("Pivot Columns: %s" % args.pivot_columns)
print("Value Column: %s" % args.value_column)
print("Output: %s" % args.output)

# Retrieve Input Dataset
input_ds = Run.get_context().input_datasets["time_series"]

# Read dataset as a DataFrame
input_df = input_ds.to_pandas_dataframe()
# NOTE: Ability to develop/work on samples from the original dataset (0.01 = 1% of full dataset)
# input_df = input_dataset.take_sample(0.01).to_pandas_dataframe()

# Generate timestamp column 'DATETIME' FROM date AND hour columns
input_df[args.datetime_column_name] = input_df.apply(lambda x: gen_date(x[args.date_column], x[args.hour_column]), axis=1)
# Drop date AND hour columns
input_df = input_df.drop(columns=[args.date_column,args.hour_column])
# Pivot Data
if args.pivot_columns:
    # pivot and set index to datetime
    output_df = pd.pivot_table(input_df, values=args.value_column, index=args.datetime_column_name, columns=args.pivot_columns, aggfunc=np.max)
else:
Exemple #12
0
from azureml.core import Run

# The dataset is specified at the pipeline definition level.

RANDOM_STATE = 42

parser = argparse.ArgumentParser()

parser.add_argument('--X_train_dir', dest='X_train_dir', required=True)
parser.add_argument('--X_test_dir', dest='X_test_dir', required=True)
parser.add_argument('--y_train_dir', dest='y_train_dir', required=True)
parser.add_argument('--y_test_dir', dest='y_test_dir', required=True)

args = parser.parse_args()

ds = Run.get_context().input_datasets['iris_baseline']

# Now the actual data prep (trivial)
df = ds.to_pandas_dataframe()
le = LabelEncoder()
le.fit(df['species'])
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:4],
                                                    le.transform(
                                                        df['species']),
                                                    test_size=0.2,
                                                    random_state=RANDOM_STATE)

# Write outputs as `OutputFileDatasetConfig`
x_train_fname = os.path.join(args.X_train_dir, "data.txt")
x_test_fname = os.path.join(args.X_test_dir, "data.txt")
y_train_fname = os.path.join(args.y_train_dir, "data.txt")
Exemple #13
0
def handle_arguments(arg_parser) -> argparse.Namespace:
    # data args
    arg_parser.add_argument("--data-folder", type=str)
    args = arg_parser.parse_args()
    return args


def handle_configurations() -> Tuple[dict, dict, dict]:
    conf = load_training_conf("train_conf.yml")
    conf_train, conf_data = conf["training"], conf["data"]
    azure_conf = load_azure_conf("azure_conf.yml")
    return conf_train, conf_data, azure_conf


if __name__ == "__main__":
    azure_run_context = Run.get_context()
    args = handle_arguments(argparse.ArgumentParser())
    conf_train, conf_data, azure_conf = handle_configurations()
    csv_dataset_name = azure_conf["LOCAL_DATASET_PATH"].split(os.sep)[-1]

    (x_train, x_test, y_train, y_test), tokenizer = training_data(
        tickets_data_path=os.path.join(args.data_folder, csv_dataset_name),
        text_column=conf_data["text_column"],
        label_column=conf_data["label_column"],
        test_size=conf_train.get("test_set_size", 0.25),
        subset_size=-1,
        max_length=conf_data["max_words_per_message"],
        pad_to_max_length=conf_data.get("pad_to_max_length", True),
    )
    model = DistilBertClassifier(
        num_labels=y_train.shape[1],
Exemple #14
0
def init():
    global current_run
    current_run = Run.get_context()
        tokenizer.pad_token = tokenizer.eos_token
        model.config.pad_token_id = model.config.eos_token_id

    encoded_dataset_train, encoded_dataset_eval = load_encoded_glue_dataset(
        task=task, tokenizer=tokenizer)

    compute_metrics = construct_compute_metrics_function(args.task)

    trainer = Trainer(
        model,
        training_args,
        train_dataset=encoded_dataset_train,
        eval_dataset=encoded_dataset_eval,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
    )

    trainer.pop_callback(MLflowCallback)

    print("Training...")

    run = Run.get_context()  # get handle on Azure ML run
    start = time.time()
    trainer.train()
    run.log("time/epoch",
            (time.time() - start) / 60 / training_args.num_train_epochs)

    print("Evaluation...")

    trainer.evaluate()
Exemple #16
0
parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')
args = parser.parse_args()

data_folder = os.path.join(args.data_folder, 'mnist')
print('Data folder:', data_folder)

# load train and test set into numpy arrays
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0
X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0
y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1)
y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n')

# get hold of the current run
run = Run.get_context()

print('Train a logistic regression model with regularizaion rate of', args.reg)
clf = LogisticRegression(C=1.0/args.reg, random_state=42)
clf.fit(X_train, y_train)

print('Predict the test set')
y_hat = clf.predict(X_test)

# calculate accuracy on the prediction
acc = np.average(y_hat == y_test)
print('Accuracy is', acc)

run.log('regularization rate', np.float(args.reg))
run.log('accuracy', np.float(acc))
def main():

    # ------------
    # args
    # ------------
    torch.manual_seed(0)
    pl.seed_everything(0)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
    parser.add_argument('--batch-size', type=int, dest='batch_size', default=50, help='mini batch size for training')
    parser.add_argument('--epoch', type=int, dest='epoch', default=10, help='epoch size for training')
    parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.001, help='learning rate')
    parser.add_argument('--momentum', type=float, dest='momentum', default=0.9, help='momentum')
    parser.add_argument('--model-name', type=str, dest='model_name', default='resnet', help='Fine Turning model name')
    parser.add_argument('--optimizer', type=str, dest='optimizer', default='SGD', help='Optimzers to use for training.')
    parser.add_argument('--criterion', type=str, dest='criterion', default='cross_entropy', help='Loss Function to use for training.')
    parser.add_argument('--feature_extract', type=bool, dest='feature_extract', default=True, help='Flag for feature extracting. When False, we finetune the whole model, when True we only update the reshaped layer params')

    args = parser.parse_args()

    args.num_workers=8

    data_folder = args.data_folder
    print('training dataset is stored here:', data_folder)

    input_size = 224
    if args.model_name == "inception":
        input_size = 299
    # ---------------------------
    # Azure Machnie Learning
    # 1) get Azure ML run context and log hyperparameters
    # ---------------------------
    run = Run.get_context()
    run.log('model_name', args.model_name)
    run.log('optimizer', args.optimizer)
    run.log('criterion', args.criterion)

    run.log('lr', np.float(args.learning_rate))
    run.log('momentum', np.float(args.momentum))

    # For your tagging
#    run.tag('description', 'xxx')

    # ------------
    # data
    # ------------

    transform = transforms.Compose([
                    # Augmentation
    #                transforms.RandomHorizontalFlip(),
    #                transforms.RandomVerticalFlip(),
                    transforms.RandomAffine(degrees=[-10, 10], translate=(0.1, 0.1), scale=(0.5, 1.5)),
                    transforms.RandomRotation(degrees=10),
                    # Resize
                    transforms.Resize(int(input_size * 1.3)),
                    transforms.CenterCrop(input_size),
                    # Tensor
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
    ]
    )

    dataset = torchvision.datasets.ImageFolder(args.data_folder, transform)
    args.num_classes = len(dataset.classes)

    n_train = int(len(dataset) * 0.7)
    n_val = int(len(dataset) * 0.15)
    n_test = len(dataset) - n_train - n_val

    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [n_train, n_val, n_test])

    train_loader = torch.utils.data.DataLoader(train_dataset, args.batch_size, shuffle=True, drop_last=True, num_workers=args.num_workers)
    val_loader = torch.utils.data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers)
    test_loader = torch.utils.data.DataLoader(test_dataset, args.batch_size)

    # Initialize the model for this run
    model_ft, input_size = initialize_model(args.model_name, args.num_classes, feature_extract=args.feature_extract , use_pretrained=True)
    model = FineTurningModel(args, model_ft)

    # GPU Configuration
    num_gpu = torch.cuda.device_count()
    print('num_gpu:', num_gpu)

    accelerator = None
    if num_gpu > 1:
        accelerator='ddp' # only for Single Machine

    # ------------
    # training
    # ------------
    trainer = pl.Trainer(max_epochs=args.epoch, gpus=num_gpu, accelerator=accelerator)
    trainer.fit(model, train_loader, val_loader)

    # ------------
    # Test (Not Validation)
    # ------------
    test_result = trainer.test(test_dataloaders=test_loader)
    test_result

    run.log('test_acc', [res["test_acc"] for res in test_result][0])
    run.log('test_loss', [res["test_loss"] for res in test_result][0])
    run.log('test_acc_epoch', [res["test_acc_epoch"] for res in test_result][0])
    run.log('test_loss_epoch', [res["test_loss_epoch"] for res in test_result][0])

    # ------------
    # save model
    # ------------
    outputdir = './outputs/model'
    os.makedirs(outputdir, exist_ok=True)
    torch.save(model.state_dict(), os.path.join(outputdir, 'model.dict'))
    torch.save(model, os.path.join(outputdir, 'model.pt'))