def dump(image_list, schema, output_image_dir):
    meta = ImageDirectory.create_meta()
    if image_list:
        dump_to_json_lines(image_list,
                           output_image_dir / ImageDirectory.IMAGE_LIST_FILE)

    if schema:
        dump_to_json_file(schema.to_dict(),
                          output_image_dir / ImageDirectory._SCHEMA_FILE_PATH)
        meta.update_field('schema',
                          ImageDirectory._SCHEMA_FILE_PATH,
                          override=True)

    dump_to_json_file(meta.to_dict(), output_image_dir / _META_FILE_PATH)
    if image_list and schema:
        # generate samples
        image_dir = ImageDirectory.load(output_image_dir)
        samples = image_dir.get_samples()
        dump_to_json_file(samples,
                          output_image_dir / ImageDirectory._SAMPLES_FILE_PATH)
        # update meta
        image_dir.meta.update_field('samples',
                                    ImageDirectory._SAMPLES_FILE_PATH,
                                    override=True)
        dump_to_json_file(image_dir.meta.to_dict(),
                          output_image_dir / _META_FILE_PATH)
Ejemplo n.º 2
0
def entrance(train_data_path='/mnt/chjinche/data/out_transform_train/',
             valid_data_path='/mnt/chjinche/data/out_transform_test/',
             save_model_path='/mnt/chjinche/projects/saved_model',
             model_type='densenet201',
             pretrained=True,
             memory_efficient=False,
             epochs=1,
             batch_size=16,
             learning_rate=0.001,
             random_seed=231,
             patience=2):
    logger.info("Start training.")
    logger.info(f"data path: {train_data_path}")
    logger.info(f"data path: {valid_data_path}")
    train_set = ImageDirectory.load(train_data_path).to_torchvision_dataset()
    logger.info(f"Training classes: {train_set.classes}")
    valid_set = ImageDirectory.load(valid_data_path).to_torchvision_dataset()
    # assert the same classes between train_set and valid_set.
    logger.info("Made dataset")
    classes = train_set.classes
    num_classes = len(classes)
    # TODO: use image directory api to get id-to-class mapping.
    id_to_class_dict = {i: classes[i] for i in range(num_classes)}
    logger.info("Start constructing model")
    model_config = {
        'model_type': model_type,
        'pretrained': pretrained,
        'memory_efficient': memory_efficient,
        'num_classes': num_classes
    }
    model = DenseNet(**model_config)
    model = train(model=model,
                  train_set=train_set,
                  valid_set=valid_set,
                  epochs=epochs,
                  batch_size=batch_size,
                  lr=learning_rate,
                  random_seed=random_seed,
                  patience=patience)
    # Save model file, configs and install dependencies
    # TODO: designer.model could support pathlib.Path
    conda = {
        "dependencies": [{
            "pip": [
                "azureml-defaults",
                "azureml-designer-core[image]==0.0.25.post7829218",
                "fire==0.1.3",
                "git+https://github.com/StudioCommunity/CustomModules-1.git@master#subdirectory=azureml-custom-module-examples/image-classification",
                "--extra-index-url=https://azureml-modules:3nvdtawseij7o2oenxojj35c43i5lu2ucf77pugohh4g5eqn6xnq@msdata.pkgs.visualstudio.com/_packaging/azureml-modules%40Local/pypi/simple/"
            ]
        }]
    }
    save_pytorch_state_dict_model(model,
                                  init_params=model_config,
                                  path=save_model_path,
                                  task_type=TaskType.MultiClassification,
                                  label_map=id_to_class_dict,
                                  conda=conda)
    logger.info('This experiment has been completed.')
Ejemplo n.º 3
0
def entrance(train_data_path='/mnt/chjinche/data/out_transform_train/',
             valid_data_path='/mnt/chjinche/data/out_transform_test/',
             save_model_path='/mnt/chjinche/projects/saved_model',
             model_type='densenet201',
             pretrained=True,
             memory_efficient=False,
             epochs=1,
             batch_size=16,
             learning_rate=0.001,
             random_seed=231,
             patience=2):
    logger.info("Start training.")
    logger.info(f"data path: {train_data_path}")
    logger.info(f"data path: {valid_data_path}")
    train_set = ImageDirectory.load(train_data_path).to_torchvision_dataset()
    logger.info(f"Training classes: {train_set.classes}")
    valid_set = ImageDirectory.load(valid_data_path).to_torchvision_dataset()
    # assert the same classes between train_set and valid_set.
    logger.info("Made dataset")
    classes = train_set.classes
    num_classes = len(classes)
    # TODO: use image directory api to get id-to-class mapping.
    id_to_class_dict = {i: classes[i] for i in range(num_classes)}
    logger.info("Start constructing model")
    model_config = {
        'model_type': model_type,
        'pretrained': pretrained,
        'memory_efficient': memory_efficient,
        'num_classes': num_classes
    }
    model = DenseNet(**model_config)
    model = train(model=model,
                  train_set=train_set,
                  valid_set=valid_set,
                  epochs=epochs,
                  batch_size=batch_size,
                  lr=learning_rate,
                  random_seed=random_seed,
                  patience=patience)
    # Save model file, configs and install dependencies
    # TODO: designer.model could support pathlib.Path
    local_dependencies = [str(Path(__file__).parent.parent)]
    logger.info(f'Ouput local dependencies {local_dependencies}')
    save_pytorch_state_dict_model(model,
                                  init_params=model_config,
                                  path=save_model_path,
                                  task_type=TaskType.MultiClassification,
                                  label_map=id_to_class_dict,
                                  local_dependencies=local_dependencies)
    logger.info('This experiment has been completed.')
Ejemplo n.º 4
0
def split_images(src_path, tgt_train_path, tgt_test_path, fraction):
    loaded_dir = ImageDirectory.load(src_path)
    lst = loaded_dir.image_lst
    logger.info(f'Start splitting.')
    train_set_lst, test_set_lst = get_stratified_split_list(lst, fraction)
    logger.info(f'Got stratified split list. train {len(train_set_lst)}, test {len(test_set_lst)}.')
    train_set_dir = FolderBasedImageDirectory.create_with_lst(src_path, train_set_lst)
    test_set_dir = FolderBasedImageDirectory.create_with_lst(src_path, test_set_lst)
    logger.info('Dump train set.')
    train_set_dir.dump(tgt_train_path)
    logger.info('Dump test set.')
    test_set_dir.dump(tgt_test_path)
Ejemplo n.º 5
0
def split_images(src_path, tgt_train_path, tgt_test_path, fraction):
    loaded_dir = ImageDirectory.load(src_path)
    lst = loaded_dir.image_lst
    logger.info(f'Start splitting.')
    train_set_idx, test_set_idx = get_stratified_split_list(lst, fraction)
    logger.info(
        f'Got stratified split list. train {len(train_set_idx)}, test {len(test_set_idx)}.'
    )
    train_set_dir = loaded_dir.get_sub_dir(train_set_idx)
    test_set_dir = loaded_dir.get_sub_dir(test_set_idx)
    logger.info('Dump train set.')
    train_set_dir.dump(tgt_train_path)
    logger.info('Dump test set.')
    test_set_dir.dump(tgt_test_path)
Ejemplo n.º 6
0
def entrance(mode,
             input_transform_path='/mnt/chjinche/test_data/init_transform/',
             input_image_path='/mnt/chjinche/test_data/image_dir_test/',
             output_path='/mnt/chjinche/test_data/transform_test/'):
    params = {'Mode': mode}
    ports = {
        'Input image transformation':
        ImageTransformationDirectory.load(input_transform_path),
        'Input image directory':
        ImageDirectory.load(input_image_path)
    }
    task = ApplyImageTransformation()
    task.onstart(ports=ports, params=params)
    task.apply(ports=ports, params=params).dump(output_path)
    logger.info("Transformed dir dumped")
def entrance(input_path='/mnt/chjinche/test_data/tar_file/',
             output_path='/mnt/chjinche/test_data/image_dir/'):
    logger.info('Start!')
    # Case 1: input path is torchvision ImageFolder
    # loader_dir = FolderBasedImageDirectory.load_organized(input_path)
    # TODO: Case 2: input path is custom image format
    compressed_extensions = {'.tar', '.zip'}
    compressed_path = None
    for path in Path(input_path).glob(r'**/*'):
        print(path)
        if path.suffix in compressed_extensions:
            compressed_path = path

    logger.info(f'compressed file path {compressed_path}')
    loader_dir = ImageDirectory.load_compressed(compressed_path)
    loader_dir.dump(output_path)
    logger.info('Finished.')
Ejemplo n.º 8
0
def entrance(trained_model: str,
             dataset: str,
             scored_dataset: str,
             append_score_columns_to_output: str = "true"):
    logger.info(
        f"append_score_columns_to_output = {append_score_columns_to_output}")
    params = {
        constants.APPEND_SCORE_COLUMNS_TO_OUTPUT_KEY:
        append_score_columns_to_output
    }
    score_module = BuiltinScoreModule(trained_model, params)
    any_directory = AnyDirectory.load(dataset)
    if any_directory.type == "DataFrameDirectory":
        input_dfd = DataFrameDirectory.load(dataset)
        logger.info(f"input_dfd =\n{input_dfd}")
        output_df = score_module.run(input_dfd)
    elif any_directory.type == "ImageDirectory":
        image_directory = ImageDirectory.load(dataset)
        output_df = score_module.run(image_directory)
    else:
        raise Exception(f"Unsupported directory type: {type(any_directory)}.")

    logger.info(f"output_df =\n{output_df}")
    logger.info(f"dumping to DFD {scored_dataset}")

    # TODO: Support other task types
    if score_module.model.task_type == TaskType.MultiClassification:
        predict_df = output_df
        _LABEL_NAME = 'label'
        score_columns = schema_utils.generate_score_column_meta(
            predict_df=predict_df)
        if score_module.model.label_column_name in predict_df.columns:
            label_column_name = score_module.model.label_column_name
        else:
            label_column_name = None
        meta_data = DataFrameSchema(
            column_attributes=DataFrameSchema.generate_column_attributes(
                df=predict_df),
            score_column_names=score_columns,
            label_column_name=label_column_name)
        save_data_frame_to_directory(scored_dataset,
                                     data=predict_df,
                                     schema=meta_data.to_dict())
    else:
        ioutils.save_dfd(output_df, scored_dataset)
def entrance(
        mode,
        input_transform_path='/mnt/chjinche/test_data/detection/init_transform/',
        input_image_path='/mnt/chjinche/test_data/detection/image_dir/',
        output_path='/mnt/chjinche/test_data/detection/transform/'):
    kwargs = {
        'mode':
        mode,
        'input_image_transformation':
        ImageTransformationDirectory.load(input_transform_path),
        'input_image_directory':
        ImageDirectory.load(input_image_path)
    }
    task = ApplyImageTransformation()
    task.on_init(**kwargs)
    output_dir, = task.run(**kwargs)
    output_dir.dump(output_path)
    logger.info("Transformed dir dumped")
def split_images(src_path, tgt_train_path, tgt_test_path, fraction):
    # TODO: use multi-label stratified split
    # from skmultilearn.model_selection import iterative_train_test_split
    # X_train, y_train, X_test, y_test = iterative_train_test_split(X, y, test_size = 0.5)
    loaded_dir = ImageDirectory.load(src_path)
    ann_type = loaded_dir.get_annotation_type()
    logger.info(f'task: {ann_type}')
    lst = loaded_dir.image_lst
    logger.info(f'Start splitting.')
    train_set_idx, test_set_idx = get_split_list(ann_type, lst, fraction)
    logger.info(
        f'Got split list. train {len(train_set_idx)}, test {len(test_set_idx)}.'
    )
    train_set_dir = loaded_dir.get_sub_dir(train_set_idx)
    test_set_dir = loaded_dir.get_sub_dir(test_set_idx)
    logger.info('Dump train set.')
    train_set_dir.dump(tgt_train_path)
    logger.info('Dump test set.')
    test_set_dir.dump(tgt_test_path)
def entrance(input_model_path='../init_model',
             train_data_path='../transform_train/',
             valid_data_path='../transform_test/',
             save_model_path='../saved_model',
             epochs=20,
             batch_size=16,
             learning_rate=0.001,
             random_seed=231,
             patience=2):
    logger.info("Start training.")
    logger.info(f"data path: {train_data_path}")
    logger.info(f"data path: {valid_data_path}")
    train_set = ImageDirectory.load(train_data_path).to_torchvision_dataset()
    logger.info(f"Training classes: {train_set.classes}")
    valid_set = ImageDirectory.load(valid_data_path).to_torchvision_dataset()
    # TODO: assert the same classes between train_set and valid_set.
    logger.info("Made dataset")
    classes = train_set.classes
    num_classes = len(classes)
    # TODO: use image directory api to get id-to-class mapping.
    id_to_class_dict = {i: classes[i] for i in range(num_classes)}
    logger.info("Start building model.")
    model_config = load_model_from_directory(input_model_path,
                                             model_loader=pickle_loader).data
    model_class = getattr(modellib, model_config.get('model_class', None),
                          None)
    logger.info(f'Model class: {model_class}.')
    model_config.pop('model_class', None)
    model_config['num_classes'] = num_classes
    logger.info(f'Model_config: {model_config}.')
    model = model_class(**model_config)
    logger.info("Built model. Start training.")
    model = train(model=model,
                  train_set=train_set,
                  valid_set=valid_set,
                  epochs=epochs,
                  batch_size=batch_size,
                  lr=learning_rate,
                  random_seed=random_seed,
                  patience=patience)
    # Save model file, configs and install dependencies
    # TODO: designer.model could support pathlib.Path
    conda = {
        "dependencies": [{
            "pip": [
                "azureml-defaults",
                "azureml-designer-core[image]==0.0.25.post7829218",
                "fire==0.1.3",
                "git+https://github.com/StudioCommunity/CustomModules-1.git@master#subdirectory=azureml-custom-module-examples/image-classification",
                "--extra-index-url=https://azureml-modules:3nvdtawseij7o2oenxojj35c43i5lu2ucf77pugohh4g5eqn6xnq@msdata.pkgs.visualstudio.com/_packaging/azureml-modules%40Local/pypi/simple/"
            ]
        }]
    }
    logger.info(f"Saving with conda: {conda}")
    save_pytorch_state_dict_model(model,
                                  init_params=model_config,
                                  path=save_model_path,
                                  task_type=TaskType.MultiClassification,
                                  label_map=id_to_class_dict,
                                  conda=conda)
    logger.info('This experiment has been completed.')
Ejemplo n.º 12
0
from azureml.studio.core.io.image_directory import ImageDirectory
import torch
import torch.nn as nn
from torchvision import transforms
# from .densenet import DenseNet
from .utils import logger

if __name__ == '__main__':
    # Test inference
    print("Testing inference.")
    loaded_generic_model = load_generic_model(
        path='/mnt/chjinche/projects/saved_model')
    model = loaded_generic_model.raw_model
    # # check predict before save
    # state_dict = model.state_dict()
    loader_dir = ImageDirectory.load('/mnt/chjinche/data/out_transform_test/')
    # to_tensor_transform = transforms.Compose([transforms.ToTensor()])
    # model_config = {
    #     'model_type': 'densenet201',
    #     'pretrained': False,
    #     'memory_efficient': True,
    #     'num_classes': 3
    # }
    # new_model = DenseNet(**model_config)
    # new_model.load_state_dict(state_dict)
    # if torch.cuda.is_available():
    #     new_model = new_model.cuda()
    #     if torch.cuda.device_count() > 1:
    #         new_model = torch.nn.DataParallel(new_model).cuda()
    # new_model.eval()
    # for img, label, identifier in loader_dir.iter_images():
Ejemplo n.º 13
0
from azureml.designer.model.io import load_generic_model
from azureml.studio.core.io.image_directory import ImageDirectory

if __name__ == '__main__':
    # Test inference
    print("Testing inference.")
    loaded_generic_model = load_generic_model(
        path='/mnt/chjinche/projects/saved_custom_model')
    loader_dir = ImageDirectory.load('/mnt/chjinche/data/test_data/')
    result_dfd = loaded_generic_model.predict(loader_dir.iter_images())
    print(f'result_dfd: {result_dfd}')
 def apply_image_transformation(self, input_image_path, output_path):
     loaded_dir = ImageDirectory.load(input_image_path)
     logger.info("Image dir loaded.")
     transformed_dir = self.apply(loaded_dir)
     transformed_dir.dump(output_path)
     logger.info("Transformed dir dumped")