def entrance(train_data_path='/mnt/chjinche/data/out_transform_train/', valid_data_path='/mnt/chjinche/data/out_transform_test/', save_model_path='/mnt/chjinche/projects/saved_model', model_type='densenet201', pretrained=True, memory_efficient=False, epochs=1, batch_size=16, learning_rate=0.001, random_seed=231, patience=2): logger.info("Start training.") logger.info(f"data path: {train_data_path}") logger.info(f"data path: {valid_data_path}") train_set = ImageDirectory.load(train_data_path).to_torchvision_dataset() logger.info(f"Training classes: {train_set.classes}") valid_set = ImageDirectory.load(valid_data_path).to_torchvision_dataset() # assert the same classes between train_set and valid_set. logger.info("Made dataset") classes = train_set.classes num_classes = len(classes) # TODO: use image directory api to get id-to-class mapping. id_to_class_dict = {i: classes[i] for i in range(num_classes)} logger.info("Start constructing model") model_config = { 'model_type': model_type, 'pretrained': pretrained, 'memory_efficient': memory_efficient, 'num_classes': num_classes } model = DenseNet(**model_config) model = train(model=model, train_set=train_set, valid_set=valid_set, epochs=epochs, batch_size=batch_size, lr=learning_rate, random_seed=random_seed, patience=patience) # Save model file, configs and install dependencies # TODO: designer.model could support pathlib.Path conda = { "dependencies": [{ "pip": [ "azureml-defaults", "azureml-designer-core[image]==0.0.25.post7829218", "fire==0.1.3", "git+https://github.com/StudioCommunity/CustomModules-1.git@master#subdirectory=azureml-custom-module-examples/image-classification", "--extra-index-url=https://azureml-modules:3nvdtawseij7o2oenxojj35c43i5lu2ucf77pugohh4g5eqn6xnq@msdata.pkgs.visualstudio.com/_packaging/azureml-modules%40Local/pypi/simple/" ] }] } save_pytorch_state_dict_model(model, init_params=model_config, path=save_model_path, task_type=TaskType.MultiClassification, label_map=id_to_class_dict, conda=conda) logger.info('This experiment has been completed.')
def entrance(train_data_path='/mnt/chjinche/data/out_transform_train/', valid_data_path='/mnt/chjinche/data/out_transform_test/', save_model_path='/mnt/chjinche/projects/saved_model', model_type='densenet201', pretrained=True, memory_efficient=False, epochs=1, batch_size=16, learning_rate=0.001, random_seed=231, patience=2): logger.info("Start training.") logger.info(f"data path: {train_data_path}") logger.info(f"data path: {valid_data_path}") train_set = ImageDirectory.load(train_data_path).to_torchvision_dataset() logger.info(f"Training classes: {train_set.classes}") valid_set = ImageDirectory.load(valid_data_path).to_torchvision_dataset() # assert the same classes between train_set and valid_set. logger.info("Made dataset") classes = train_set.classes num_classes = len(classes) # TODO: use image directory api to get id-to-class mapping. id_to_class_dict = {i: classes[i] for i in range(num_classes)} logger.info("Start constructing model") model_config = { 'model_type': model_type, 'pretrained': pretrained, 'memory_efficient': memory_efficient, 'num_classes': num_classes } model = DenseNet(**model_config) model = train(model=model, train_set=train_set, valid_set=valid_set, epochs=epochs, batch_size=batch_size, lr=learning_rate, random_seed=random_seed, patience=patience) # Save model file, configs and install dependencies # TODO: designer.model could support pathlib.Path local_dependencies = [str(Path(__file__).parent.parent)] logger.info(f'Ouput local dependencies {local_dependencies}') save_pytorch_state_dict_model(model, init_params=model_config, path=save_model_path, task_type=TaskType.MultiClassification, label_map=id_to_class_dict, local_dependencies=local_dependencies) logger.info('This experiment has been completed.')
def dump(image_list, schema, output_image_dir): meta = ImageDirectory.create_meta() if image_list: dump_to_json_lines(image_list, output_image_dir / ImageDirectory.IMAGE_LIST_FILE) if schema: dump_to_json_file(schema.to_dict(), output_image_dir / ImageDirectory._SCHEMA_FILE_PATH) meta.update_field('schema', ImageDirectory._SCHEMA_FILE_PATH, override=True) dump_to_json_file(meta.to_dict(), output_image_dir / _META_FILE_PATH) if image_list and schema: # generate samples image_dir = ImageDirectory.load(output_image_dir) samples = image_dir.get_samples() dump_to_json_file(samples, output_image_dir / ImageDirectory._SAMPLES_FILE_PATH) # update meta image_dir.meta.update_field('samples', ImageDirectory._SAMPLES_FILE_PATH, override=True) dump_to_json_file(image_dir.meta.to_dict(), output_image_dir / _META_FILE_PATH)
def split_images(src_path, tgt_train_path, tgt_test_path, fraction): loaded_dir = ImageDirectory.load(src_path) lst = loaded_dir.image_lst logger.info(f'Start splitting.') train_set_lst, test_set_lst = get_stratified_split_list(lst, fraction) logger.info(f'Got stratified split list. train {len(train_set_lst)}, test {len(test_set_lst)}.') train_set_dir = FolderBasedImageDirectory.create_with_lst(src_path, train_set_lst) test_set_dir = FolderBasedImageDirectory.create_with_lst(src_path, test_set_lst) logger.info('Dump train set.') train_set_dir.dump(tgt_train_path) logger.info('Dump test set.') test_set_dir.dump(tgt_test_path)
def split_images(src_path, tgt_train_path, tgt_test_path, fraction): loaded_dir = ImageDirectory.load(src_path) lst = loaded_dir.image_lst logger.info(f'Start splitting.') train_set_idx, test_set_idx = get_stratified_split_list(lst, fraction) logger.info( f'Got stratified split list. train {len(train_set_idx)}, test {len(test_set_idx)}.' ) train_set_dir = loaded_dir.get_sub_dir(train_set_idx) test_set_dir = loaded_dir.get_sub_dir(test_set_idx) logger.info('Dump train set.') train_set_dir.dump(tgt_train_path) logger.info('Dump test set.') test_set_dir.dump(tgt_test_path)
def entrance(mode, input_transform_path='/mnt/chjinche/test_data/init_transform/', input_image_path='/mnt/chjinche/test_data/image_dir_test/', output_path='/mnt/chjinche/test_data/transform_test/'): params = {'Mode': mode} ports = { 'Input image transformation': ImageTransformationDirectory.load(input_transform_path), 'Input image directory': ImageDirectory.load(input_image_path) } task = ApplyImageTransformation() task.onstart(ports=ports, params=params) task.apply(ports=ports, params=params).dump(output_path) logger.info("Transformed dir dumped")
def entrance(trained_model: str, dataset: str, scored_dataset: str, append_score_columns_to_output: str = "true"): logger.info( f"append_score_columns_to_output = {append_score_columns_to_output}") params = { constants.APPEND_SCORE_COLUMNS_TO_OUTPUT_KEY: append_score_columns_to_output } score_module = BuiltinScoreModule(trained_model, params) any_directory = AnyDirectory.load(dataset) if any_directory.type == "DataFrameDirectory": input_dfd = DataFrameDirectory.load(dataset) logger.info(f"input_dfd =\n{input_dfd}") output_df = score_module.run(input_dfd) elif any_directory.type == "ImageDirectory": image_directory = ImageDirectory.load(dataset) output_df = score_module.run(image_directory) else: raise Exception(f"Unsupported directory type: {type(any_directory)}.") logger.info(f"output_df =\n{output_df}") logger.info(f"dumping to DFD {scored_dataset}") # TODO: Support other task types if score_module.model.task_type == TaskType.MultiClassification: predict_df = output_df _LABEL_NAME = 'label' score_columns = schema_utils.generate_score_column_meta( predict_df=predict_df) if score_module.model.label_column_name in predict_df.columns: label_column_name = score_module.model.label_column_name else: label_column_name = None meta_data = DataFrameSchema( column_attributes=DataFrameSchema.generate_column_attributes( df=predict_df), score_column_names=score_columns, label_column_name=label_column_name) save_data_frame_to_directory(scored_dataset, data=predict_df, schema=meta_data.to_dict()) else: ioutils.save_dfd(output_df, scored_dataset)
def entrance( mode, input_transform_path='/mnt/chjinche/test_data/detection/init_transform/', input_image_path='/mnt/chjinche/test_data/detection/image_dir/', output_path='/mnt/chjinche/test_data/detection/transform/'): kwargs = { 'mode': mode, 'input_image_transformation': ImageTransformationDirectory.load(input_transform_path), 'input_image_directory': ImageDirectory.load(input_image_path) } task = ApplyImageTransformation() task.on_init(**kwargs) output_dir, = task.run(**kwargs) output_dir.dump(output_path) logger.info("Transformed dir dumped")
def split_images(src_path, tgt_train_path, tgt_test_path, fraction): # TODO: use multi-label stratified split # from skmultilearn.model_selection import iterative_train_test_split # X_train, y_train, X_test, y_test = iterative_train_test_split(X, y, test_size = 0.5) loaded_dir = ImageDirectory.load(src_path) ann_type = loaded_dir.get_annotation_type() logger.info(f'task: {ann_type}') lst = loaded_dir.image_lst logger.info(f'Start splitting.') train_set_idx, test_set_idx = get_split_list(ann_type, lst, fraction) logger.info( f'Got split list. train {len(train_set_idx)}, test {len(test_set_idx)}.' ) train_set_dir = loaded_dir.get_sub_dir(train_set_idx) test_set_dir = loaded_dir.get_sub_dir(test_set_idx) logger.info('Dump train set.') train_set_dir.dump(tgt_train_path) logger.info('Dump test set.') test_set_dir.dump(tgt_test_path)
def entrance(input_model_path='../init_model', train_data_path='../transform_train/', valid_data_path='../transform_test/', save_model_path='../saved_model', epochs=20, batch_size=16, learning_rate=0.001, random_seed=231, patience=2): logger.info("Start training.") logger.info(f"data path: {train_data_path}") logger.info(f"data path: {valid_data_path}") train_set = ImageDirectory.load(train_data_path).to_torchvision_dataset() logger.info(f"Training classes: {train_set.classes}") valid_set = ImageDirectory.load(valid_data_path).to_torchvision_dataset() # TODO: assert the same classes between train_set and valid_set. logger.info("Made dataset") classes = train_set.classes num_classes = len(classes) # TODO: use image directory api to get id-to-class mapping. id_to_class_dict = {i: classes[i] for i in range(num_classes)} logger.info("Start building model.") model_config = load_model_from_directory(input_model_path, model_loader=pickle_loader).data model_class = getattr(modellib, model_config.get('model_class', None), None) logger.info(f'Model class: {model_class}.') model_config.pop('model_class', None) model_config['num_classes'] = num_classes logger.info(f'Model_config: {model_config}.') model = model_class(**model_config) logger.info("Built model. Start training.") model = train(model=model, train_set=train_set, valid_set=valid_set, epochs=epochs, batch_size=batch_size, lr=learning_rate, random_seed=random_seed, patience=patience) # Save model file, configs and install dependencies # TODO: designer.model could support pathlib.Path conda = { "dependencies": [{ "pip": [ "azureml-defaults", "azureml-designer-core[image]==0.0.25.post7829218", "fire==0.1.3", "git+https://github.com/StudioCommunity/CustomModules-1.git@master#subdirectory=azureml-custom-module-examples/image-classification", "--extra-index-url=https://azureml-modules:3nvdtawseij7o2oenxojj35c43i5lu2ucf77pugohh4g5eqn6xnq@msdata.pkgs.visualstudio.com/_packaging/azureml-modules%40Local/pypi/simple/" ] }] } logger.info(f"Saving with conda: {conda}") save_pytorch_state_dict_model(model, init_params=model_config, path=save_model_path, task_type=TaskType.MultiClassification, label_map=id_to_class_dict, conda=conda) logger.info('This experiment has been completed.')
from azureml.studio.core.io.image_directory import ImageDirectory import torch import torch.nn as nn from torchvision import transforms # from .densenet import DenseNet from .utils import logger if __name__ == '__main__': # Test inference print("Testing inference.") loaded_generic_model = load_generic_model( path='/mnt/chjinche/projects/saved_model') model = loaded_generic_model.raw_model # # check predict before save # state_dict = model.state_dict() loader_dir = ImageDirectory.load('/mnt/chjinche/data/out_transform_test/') # to_tensor_transform = transforms.Compose([transforms.ToTensor()]) # model_config = { # 'model_type': 'densenet201', # 'pretrained': False, # 'memory_efficient': True, # 'num_classes': 3 # } # new_model = DenseNet(**model_config) # new_model.load_state_dict(state_dict) # if torch.cuda.is_available(): # new_model = new_model.cuda() # if torch.cuda.device_count() > 1: # new_model = torch.nn.DataParallel(new_model).cuda() # new_model.eval() # for img, label, identifier in loader_dir.iter_images():
from azureml.designer.model.io import load_generic_model from azureml.studio.core.io.image_directory import ImageDirectory if __name__ == '__main__': # Test inference print("Testing inference.") loaded_generic_model = load_generic_model( path='/mnt/chjinche/projects/saved_custom_model') loader_dir = ImageDirectory.load('/mnt/chjinche/data/test_data/') result_dfd = loaded_generic_model.predict(loader_dir.iter_images()) print(f'result_dfd: {result_dfd}')
def apply_image_transformation(self, input_image_path, output_path): loaded_dir = ImageDirectory.load(input_image_path) logger.info("Image dir loaded.") transformed_dir = self.apply(loaded_dir) transformed_dir.dump(output_path) logger.info("Transformed dir dumped")