def step_one(pickle_data_url: str, extra: int = 43):
    print('step_one')
    # make sure we have scikit-learn for this step, we need it to use to unpickle the object
    import sklearn  # noqa
    import pickle
    import pandas as pd
    from clearml import StorageManager
    local_iris_pkl = StorageManager.get_local_copy(remote_url=pickle_data_url)
    with open(local_iris_pkl, 'rb') as f:
        iris = pickle.load(f)
    data_frame = pd.DataFrame(iris['data'], columns=iris['feature_names'])
    data_frame.columns += ['target']
    data_frame['target'] = iris['target']
    return data_frame
def step_one(pickle_data_url):
    # make sure we have scikit-learn for this step, we need it to use to unpickle the object
    import sklearn  # noqa
    import pickle
    import pandas as pd
    from clearml import StorageManager
    pickle_data_url = \
        pickle_data_url or \
        'https://github.com/allegroai/events/raw/master/odsc20-east/generic/iris_dataset.pkl'
    local_iris_pkl = StorageManager.get_local_copy(remote_url=pickle_data_url)
    with open(local_iris_pkl, 'rb') as f:
        iris = pickle.load(f)
    data_frame = pd.DataFrame(iris['data'], columns=iris['feature_names'])
    data_frame.columns += ['target']
    data_frame['target'] = iris['target']
    return data_frame
from clearml import Task, StorageManager

# create an dataset experiment
task = Task.init(project_name="examples",
                 task_name="Pipeline step 1 dataset artifact")

# only create the task, we will actually execute it later
task.execute_remotely()

# simulate local dataset, download one, so we have something local
local_iris_pkl = StorageManager.get_local_copy(
    remote_url=
    'https://github.com/allegroai/events/raw/master/odsc20-east/generic/iris_dataset.pkl'
)

# add and upload local file containing our toy dataset
task.upload_artifact('dataset', artifact_object=local_iris_pkl)

print('uploading artifacts in the background')

# we are done
print('Done')
Ejemplo n.º 4
0
def main():
    # Connecting ClearML with the current process,
    # from here on everything is logged automatically
    task = Task.init(
        project_name="examples",
        task_name="Model update PyTorch",
        auto_connect_frameworks={"pytorch": False},
    )
    params = {
        "number_of_epochs": 1,
        "batch_size": 64,
        "dropout": 0.25,
        "base_lr": 0.001,
        "momentum": 0.9,
        "loss_report": 100,
    }

    params = task.connect(params)  # enabling configuration override by clearml
    print(params
          )  # printing actual configuration (after override in remote mode)

    model = OutputModel(task=task, framework="pytorch")
    model_config_dict = {
        "list_of_ints": [1, 2, 3, 4],
        "dict": {
            "sub_value": "string",
            "sub_integer": 11
        },
        "value": 13.37
    }
    model.update_design(config_dict=model_config_dict)

    manager = StorageManager()

    dataset_path = Path(
        manager.get_local_copy(
            remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
        ))

    # Dataset and Dataloader initializations
    transform = transforms.Compose([transforms.ToTensor()])

    trainset = datasets.CIFAR10(root=dataset_path,
                                train=True,
                                download=False,
                                transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=params.get(
                                                  "batch_size", 4),
                                              shuffle=True,
                                              num_workers=10)

    testset = datasets.CIFAR10(root=dataset_path,
                               train=False,
                               download=False,
                               transform=transform)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=params.get(
                                                 "batch_size", 4),
                                             shuffle=False,
                                             num_workers=10)

    run(
        params["number_of_epochs"],
        params["base_lr"],
        params["momentum"],
        10,
        params,
        trainloader,
        testloader,
        model,
    )
Ejemplo n.º 5
0
task.connect(args)
print('Arguments: {}'.format(args))

# only create the task, we will actually execute it later
task.execute_remotely()

# get dataset from task's artifact
if args['dataset_task_id']:
    dataset_upload_task = Task.get_task(task_id=args['dataset_task_id'])
    print('Input task id={} artifacts {}'.format(
        args['dataset_task_id'], list(dataset_upload_task.artifacts.keys())))
    # download the artifact
    iris_pickle = dataset_upload_task.artifacts['dataset'].get_local_copy()
# get the dataset from a direct url
elif args['dataset_url']:
    iris_pickle = StorageManager.get_local_copy(remote_url=args['dataset_url'])
else:
    raise ValueError("Missing dataset link")

# open the local copy
iris = pickle.load(open(iris_pickle, 'rb'))

# "process" data
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=args['test_size'], random_state=args['random_state'])

# upload processed data
print('Uploading process dataset')
task.upload_artifact('X_train', X_train)
Ejemplo n.º 6
0
# Connecting ClearML with the current process,
# from here on everything is logged automatically
task = Task.init(project_name='examples',
                 task_name='Image classification CIFAR10')
params = {
    'number_of_epochs': 20,
    'batch_size': 64,
    'dropout': 0.25,
    'base_lr': 0.001,
    'momentum': 0.9,
    'loss_report': 100
}
params = task.connect(params)  # enabling configuration override by clearml
print(params)  # printing actual configuration (after override in remote mode)

manager = StorageManager()

dataset_path = Path(
    manager.get_local_copy(
        remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"))

# Dataset and Dataloader initializations
transform = transforms.Compose([transforms.ToTensor()])

trainset = datasets.CIFAR10(root=dataset_path,
                            train=True,
                            download=False,
                            transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=params.get(
                                              'batch_size', 4),
Ejemplo n.º 7
0
from clearml import Task, StorageManager


task = Task.init(project_name="mushrooms",
                 task_name="mushrooms step 1 dataset artifact",
                 task_type=Task.TaskTypes.data_processing)
task.execute_remotely()

local_mushrooms_dataset = StorageManager.get_local_copy(
    remote_url="https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/mushrooms.csv")
task.upload_artifact("dataset", artifact_object=local_mushrooms_dataset)

print('uploading csv dataset in the background')
Ejemplo n.º 8
0
    opt.data = args['data']
    opt.evolve = args['evolve']
    opt.hyp = args['hyp']
    #Some issue , Fix to False
    opt.resume = False

    print(f'args\t{args}')
    print(f'opt\t{opt}')
    print(f'Reading in data from clearml')
    from clearml import Dataset, StorageManager
    print(f"CHECK dataset url\t{args['dataset_url']}")
    if args['dataset_url'] == '':
        args[
            'dataset_url'] = 'http://192.168.180.150:30081/pipe_it_up/grayscale.4030799c8a0d493983f287b454a549b3/artifacts/dataset/ds_ece1c9373b924f4ca3719ee53afd4647.zip'

    data_dir = StorageManager.get_local_copy(remote_url=args['dataset_url'])
    print(f"{args['dataset_url']}\tcheck the path data_dir\n{data_dir}")
    clearml_path = {}
    clearml_path['train'] = data_dir
    clearml_path['val'] = data_dir
    clearml_path['freeze_backbone'] = args['freeze_backbone']
    print(f'In Main: clearml_path:\t{clearml_path}')

    # Set DDP variables
    opt.world_size = int(
        os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
    opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
    set_logging(opt.global_rank)
    if opt.global_rank in [-1, 0]:
        check_git_status()
        check_requirements(exclude=('pycocotools', 'thop'))