def preview_synthetic_training_data(data, target, backgrounds=None, verbose=True, **kwargs): """ A utility function to visualize the synthetically generated data. Parameters ---------- data : SFrame | tc.Image A single starter image or an SFrame that contains the starter images along with their corresponding labels. These image(s) can be in either RGB or RGBA format. They should not be padded. target : string Name of the target (when data is a single image) or the target column name (when data is an SFrame of images). backgrounds : optional SArray A list of backgrounds used for synthetic data generation. When set to None, a set of default backgrounds are downloaded and used. verbose : bool optional If True, print progress updates and details. Returns ------- out : SFrame An SFrame of sythetically generated annotated training data. """ dataset_to_augment, image_column_name, target_column_name = check_one_shot_input( data, target, backgrounds) _tkutl._handle_missing_values(dataset_to_augment, image_column_name, "dataset") if backgrounds is None: backgrounds_downloader = _data_zoo.OneShotObjectDetectorBackgroundData( ) backgrounds = backgrounds_downloader.get_backgrounds() # We resize the background dimensions by half along each axis to reduce # the disk footprint during augmentation, and also reduce the time # taken to synthesize data. backgrounds = backgrounds.apply(lambda im: _tc.image_analysis.resize( im, int(im.width / 2), int(im.height / 2), im.channels)) # Option arguments to pass in to C++ Object Detector, if we use it: # {'mlmodel_path':'darknet.mlmodel', 'max_iterations' : 25} seed = kwargs["seed"] if "seed" in kwargs else _random.randint( 0, 2**32 - 1) options_for_augmentation = {"seed": seed, "verbose": verbose} one_shot_model = _extensions.one_shot_object_detector() augmented_data = one_shot_model.augment( dataset_to_augment, image_column_name, target_column_name, backgrounds, options_for_augmentation, ) return augmented_data
def preview_synthetic_training_data(data, target, backgrounds=None, verbose=True, **kwargs): """ A utility function to visualize the synthetically generated data. Parameters ---------- data : SFrame | tc.Image A single starter image or an SFrame that contains the starter images along with their corresponding labels. These image(s) can be in either RGB or RGBA format. They should not be padded. target : string Name of the target (when data is a single image) or the target column name (when data is an SFrame of images). backgrounds : optional SArray A list of backgrounds used for synthetic data generation. When set to None, a set of default backgrounds are downloaded and used. Returns ------- out : SFrame An SFrame of sythetically generated annotated training data. """ dataset_to_augment, image_column_name, target_column_name = check_one_shot_input( data, target, backgrounds) _tkutl._handle_missing_values(dataset_to_augment, image_column_name, 'dataset') one_shot_model = _extensions.one_shot_object_detector() seed = kwargs["seed"] if "seed" in kwargs else _random.randint( 0, 2**32 - 1) if backgrounds is None: backgrounds_downloader = _data_zoo.OneShotObjectDetectorBackgroundData( ) backgrounds_tar_path = backgrounds_downloader.get_backgrounds_path() backgrounds_tar = _tarfile.open(backgrounds_tar_path) backgrounds_tar.extractall() backgrounds = _tc.SArray("one_shot_backgrounds.sarray") # Option arguments to pass in to C++ Object Detector, if we use it: # {'mlmodel_path':'darknet.mlmodel', 'max_iterations' : 25} options_for_augmentation = {"seed": seed, "verbose": verbose} augmented_data = one_shot_model.augment(dataset_to_augment, image_column_name, target_column_name, backgrounds, options_for_augmentation) return augmented_data
def create( dataset, target, feature=None, model='resnet-50', l2_penalty=0.01, l1_penalty=0.0, solver='auto', feature_rescaling=True, convergence_threshold=_DEFAULT_SOLVER_OPTIONS['convergence_threshold'], step_size=_DEFAULT_SOLVER_OPTIONS['step_size'], lbfgs_memory_level=_DEFAULT_SOLVER_OPTIONS['lbfgs_memory_level'], max_iterations=_DEFAULT_SOLVER_OPTIONS['max_iterations'], class_weights=None, validation_set='auto', verbose=True, seed=None, batch_size=64): """ Create a :class:`ImageClassifier` model. Parameters ---------- dataset : SFrame Input data. The column named by the 'feature' parameter will be extracted for modeling. target : string, or int Name of the column containing the target variable. The values in this column must be of string or integer type. String target variables are automatically mapped to integers in the order in which they are provided. For example, a target variable with 'cat' and 'dog' as possible values is mapped to 0 and 1 respectively with 0 being the base class and 1 being the reference class. Use `model.classes` to retrieve the order in which the classes are mapped. feature : string, optional indicates that the SFrame has only column of Image type and that will Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. l2_penalty : float, optional Weight on l2 regularization of the model. The larger this weight, the more the model coefficients shrink toward 0. This introduces bias into the model but decreases variance, potentially leading to better predictions. The default value is 0.01; setting this parameter to 0 corresponds to unregularized logistic regression. See the ridge regression reference for more detail. l1_penalty : float, optional Weight on l1 regularization of the model. Like the l2 penalty, the higher the l1 penalty, the more the estimated coefficients shrink toward 0. The l1 penalty, however, completely zeros out sufficiently small coefficients, automatically indicating features that are not useful for the model. The default weight of 0 prevents any features from being discarded. See the LASSO regression reference for more detail. solver : string, optional Name of the solver to be used to solve the regression. See the references for more detail on each solver. Available solvers are: - *auto (default)*: automatically chooses the best solver for the data and model parameters. - *newton*: Newton-Raphson - *lbfgs*: limited memory BFGS - *fista*: accelerated gradient descent For this model, the Newton-Raphson method is equivalent to the iteratively re-weighted least squares algorithm. If the l1_penalty is greater than 0, use the 'fista' solver. The model is trained using a carefully engineered collection of methods that are automatically picked based on the input data. The ``newton`` method works best for datasets with plenty of examples and few features (long datasets). Limited memory BFGS (``lbfgs``) is a robust solver for wide datasets (i.e datasets with many coefficients). ``fista`` is the default solver for l1-regularized linear regression. The solvers are all automatically tuned and the default options should function well. See the solver options guide for setting additional parameters for each of the solvers. See the user guide for additional details on how the solver is chosen. (see `here <https://apple.github.io/turicreate/docs/userguide/supervised-learning/linear-regression.html>`_) feature_rescaling : boolean, optional Feature rescaling is an important pre-processing step that ensures that all features are on the same scale. An l2-norm rescaling is performed to make sure that all features are of the same norm. Categorical features are also rescaled by rescaling the dummy variables that are used to represent them. The coefficients are returned in original scale of the problem. This process is particularly useful when features vary widely in their ranges. convergence_threshold : float, optional Convergence is tested using variation in the training objective. The variation in the training objective is calculated using the difference between the objective values between two steps. Consider reducing this below the default value (0.01) for a more accurately trained model. Beware of overfitting (i.e a model that works well only on the training data) if this parameter is set to a very low value. lbfgs_memory_level : float, optional The L-BFGS algorithm keeps track of gradient information from the previous ``lbfgs_memory_level`` iterations. The storage requirement for each of these gradients is the ``num_coefficients`` in the problem. Increasing the ``lbfgs_memory_level ``can help improve the quality of the model trained. Setting this to more than ``max_iterations`` has the same effect as setting it to ``max_iterations``. model : string optional Uses a pretrained model to bootstrap an image classifier: - "resnet-50" : Uses a pretrained resnet model. Exported Core ML model will be ~90M. - "squeezenet_v1.1" : Uses a pretrained squeezenet model. Exported Core ML model will be ~4.7M. - "VisionFeaturePrint_Scene": Uses an OS internal feature extractor. Only on available on iOS 12.0+, macOS 10.14+ and tvOS 12.0+. Exported Core ML model will be ~41K. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. step_size : float, optional The starting step size to use for the ``fista`` solver. The default is set to 1.0, this is an aggressive setting. If the first iteration takes a considerable amount of time, reducing this parameter may speed up model training. class_weights : {dict, `auto`}, optional Weights the examples in the training data according to the given class weights. If set to `None`, all classes are supposed to have weight one. The `auto` mode set the class weight to be inversely proportional to number of examples in the training data with the given class. validation_set : SFrame, optional A dataset for monitoring the model's generalization performance. The format of this SFrame must be the same as the training set. By default this argument is set to 'auto' and a validation set is automatically sampled and used for progress printing. If validation_set is set to None, then no additional metrics are computed. The default value is 'auto'. max_iterations : int, optional The maximum number of allowed passes through the data. More passes over the data can result in a more accurately trained model. Consider increasing this (the default value is 10) if the training accuracy is low and the *Grad-Norm* in the display is large. verbose : bool, optional If True, prints progress updates and model details. seed : int, optional Seed for random number generation. Set this value to ensure that the same model is created every time. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : ImageClassifier A trained :class:`ImageClassifier` model. Examples -------- .. sourcecode:: python >>> model = turicreate.image_classifier.create(data, target='is_expensive') # Make predictions (in various forms) >>> predictions = model.predict(data) # predictions >>> predictions = model.classify(data) # predictions with confidence >>> predictions = model.predict_topk(data) # Top-5 predictions (multiclass) # Evaluate the model with ground truth data >>> results = model.evaluate(data) See Also -------- ImageClassifier """ start_time = _time.time() if not isinstance(dataset, _tc.SFrame): raise TypeError('"dataset" must be of type SFrame.') # Check model parameter allowed_models = list(_pre_trained_models.IMAGE_MODELS.keys()) if _mac_ver() >= (10, 14): allowed_models.append('VisionFeaturePrint_Scene') _tkutl._check_categorical_option_type('model', model, allowed_models) # Check dataset parameter if not isinstance(dataset, _tc.SFrame): raise TypeError( "Unrecognized type for 'dataset'. An SFrame is expected.") if len(dataset) == 0: raise _ToolkitError('Unable to train on empty dataset') if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) if target not in dataset.column_names(): raise _ToolkitError("Target column '%s' does not exist" % target) if (batch_size < 1): raise ValueError("'batch_size' must be greater than or equal to 1") if not (isinstance(validation_set, _tc.SFrame) or validation_set == 'auto' or validation_set is None): raise TypeError("Unrecognized value for 'validation_set'.") if feature is None: feature = _tkutl._find_only_image_column(dataset) _tkutl._handle_missing_values(dataset, feature, 'training_dataset') feature_extractor = _image_feature_extractor._create_feature_extractor( model) # Extract features extracted_features = _tc.SFrame({ target: dataset[target], '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose, batch_size=batch_size), }) if isinstance(validation_set, _tc.SFrame): _tkutl._handle_missing_values(dataset, feature, 'validation_set') extracted_features_validation = _tc.SFrame({ target: validation_set[target], '__image_features__': feature_extractor.extract_features(validation_set, feature, verbose=verbose, batch_size=batch_size), }) else: extracted_features_validation = validation_set # Train a classifier using the extracted features extracted_features[target] = dataset[target] lr_model = _tc.logistic_classifier.create( extracted_features, features=['__image_features__'], target=target, max_iterations=max_iterations, validation_set=extracted_features_validation, seed=seed, verbose=verbose, l2_penalty=l2_penalty, l1_penalty=l1_penalty, solver=solver, feature_rescaling=feature_rescaling, convergence_threshold=convergence_threshold, step_size=step_size, lbfgs_memory_level=lbfgs_memory_level, class_weights=class_weights) # set input image shape if model in _pre_trained_models.IMAGE_MODELS: input_image_shape = _pre_trained_models.IMAGE_MODELS[ model].input_image_shape else: # model == VisionFeaturePrint_Scene input_image_shape = (3, 299, 299) # Save the model state = { 'classifier': lr_model, 'model': model, 'max_iterations': max_iterations, 'feature_extractor': feature_extractor, 'input_image_shape': input_image_shape, 'target': target, 'feature': feature, 'num_features': 1, 'num_classes': lr_model.num_classes, 'classes': lr_model.classes, 'num_examples': lr_model.num_examples, 'training_time': _time.time() - start_time, 'training_loss': lr_model.training_loss, } return ImageClassifier(state)
def create( dataset, label=None, feature=None, model="resnet-50", verbose=True, batch_size=64 ): """ Create a :class:`ImageSimilarityModel` model. Parameters ---------- dataset : SFrame Input data. The column named by the 'feature' parameter will be extracted for modeling. label : string Name of the SFrame column with row labels to be used as uuid's to identify the data. If 'label' is set to None, row numbers are used to identify reference dataset rows when the model is queried. feature : string Name of the column containing either the input images or extracted features. 'None' (the default) indicates that only feature column or the only image column in `dataset` should be used as the feature. model: string, optional Uses a pretrained model to bootstrap an image similarity model - "resnet-50" : Uses a pretrained resnet model. - "squeezenet_v1.1" : Uses a pretrained squeezenet model. - "VisionFeaturePrint_Scene": Uses an OS internal feature extractor. Only on available on iOS 12.0+, macOS 10.14+ and tvOS 12.0+. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. verbose : bool, optional If True, print progress updates and model details. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. Returns ------- out : ImageSimilarityModel A trained :class:`ImageSimilarityModel` model. See Also -------- ImageSimilarityModel Examples -------- .. sourcecode:: python # Train an image similarity model >>> model = turicreate.image_similarity.create(data) # Query the model for similar images >>> similar_images = model.query(data) +-------------+-----------------+-------------------+------+ | query_label | reference_label | distance | rank | +-------------+-----------------+-------------------+------+ | 0 | 0 | 0.0 | 1 | | 0 | 519 | 12.5319706301 | 2 | | 0 | 1619 | 12.5563764596 | 3 | | 0 | 186 | 12.6132604915 | 4 | | 0 | 1809 | 12.9180964745 | 5 | | 1 | 1 | 2.02304872852e-06 | 1 | | 1 | 1579 | 11.4288186151 | 2 | | 1 | 1237 | 12.3764325949 | 3 | | 1 | 80 | 12.7264363676 | 4 | | 1 | 58 | 12.7675058558 | 5 | +-------------+-----------------+-------------------+------+ [500 rows x 4 columns] """ start_time = _time.time() if not isinstance(dataset, _tc.SFrame): raise TypeError("'dataset' must be of type SFrame.") # Check parameters allowed_models = list(_pre_trained_models.IMAGE_MODELS.keys()) if _mac_ver() >= (10, 14): allowed_models.append("VisionFeaturePrint_Scene") # Also, to make sure existing code doesn't break, replace incorrect name # with the correct name version if model == "VisionFeaturePrint_Screen": print( "WARNING: Correct spelling of model name is VisionFeaturePrint_Scene. VisionFeaturePrint_Screen will be removed in future releases." ) model = "VisionFeaturePrint_Scene" _tkutl._check_categorical_option_type("model", model, allowed_models) if len(dataset) == 0: raise _ToolkitError("Unable to train on empty dataset") if (label is not None) and (label not in dataset.column_names()): raise _ToolkitError("Row label column '%s' does not exist" % label) if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) if batch_size < 1: raise ValueError("'batch_size' must be greater than or equal to 1") # Set defaults if feature is None: # select feature column : either extracted features columns or image column itself try: feature = image_analysis._find_only_image_extracted_features_column(dataset, model) feature_type = "extracted_features_array" except: feature = None if feature is None: try: feature = _tkutl._find_only_image_column(dataset) feature_type = "image" except: raise _ToolkitError( 'No feature column specified and no column with expected type image or array is found.' + ' "datasets" consists of columns with types: ' + ", ".join([x.__name__ for x in dataset.column_types()]) + "." ) else: if image_analysis._is_image_deep_feature_sarray(dataset[feature], model): feature_type = "extracted_features_array" elif dataset[feature].dtype is _tc.Image: feature_type = "image" else: raise _ToolkitError('The "{feature}" column of the sFrame neither has the dataype image or array (for extracted features)'.format(feature=feature) + ' "datasets" consists of columns with types: ' + ", ".join([x.__name__ for x in dataset.column_types()]) + "." ) _tkutl._handle_missing_values(dataset, feature) feature_extractor = _image_feature_extractor._create_feature_extractor(model) if feature_type == "image": # Extract features extracted_features = _tc.SFrame( { "__image_features__": feature_extractor.extract_features( dataset, feature, verbose=verbose, batch_size=batch_size ), } ) else: extracted_features = _tc.SFrame( { "__image_features__": dataset[feature] } ) # Train a similarity model using the extracted features if label is not None: extracted_features[label] = dataset[label] nn_model = _tc.nearest_neighbors.create( extracted_features, label=label, features=["__image_features__"], verbose=verbose, ) # set input image shape if model in _pre_trained_models.IMAGE_MODELS: input_image_shape = _pre_trained_models.IMAGE_MODELS[model].input_image_shape else: # model == VisionFeaturePrint_Scene input_image_shape = (3, 299, 299) # Save the model state = { "similarity_model": nn_model, "model": model, "feature_extractor": feature_extractor, "input_image_shape": input_image_shape, "label": label, "feature": feature, "num_features": 1, "num_examples": nn_model.num_examples, "training_time": _time.time() - start_time, } return ImageSimilarityModel(state)
def create( dataset, session_id, target, features=None, prediction_window=100, validation_set="auto", max_iterations=10, batch_size=32, verbose=True, ): """ Create an :class:`ActivityClassifier` model. Parameters ---------- dataset : SFrame Input data which consists of `sessions` of data where each session is a sequence of data. The data must be in `stacked` format, grouped by session. Within each session, the data is assumed to be sorted temporally. Columns in `features` will be used to train a model that will make a prediction using labels in the `target` column. session_id : string Name of the column that contains a unique ID for each session. target : string Name of the column containing the target variable. The values in this column must be of string or integer type. Use `model.classes` to retrieve the order in which the classes are mapped. features : list[string], optional Name of the columns containing the input features that will be used for classification. If set to `None`, all columns except `session_id` and `target` will be used. prediction_window : int, optional Number of time units between predictions. For example, if your input data is sampled at 100Hz, and the `prediction_window` is set to 100, then this model will make a prediction every 1 second. validation_set : SFrame, optional A dataset for monitoring the model's generalization performance to prevent the model from overfitting to the training data. For each row of the progress table, accuracy is measured over the provided training dataset and the `validation_set`. The format of this SFrame must be the same as the training set. When set to 'auto', a validation set is automatically sampled from the training data (if the training data has > 100 sessions). If validation_set is set to None, then all the data will be used for training. max_iterations : int , optional Maximum number of iterations/epochs made over the data during the training phase. batch_size : int, optional Number of sequence chunks used per training step. Must be greater than the number of GPUs in use. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ActivityClassifier A trained :class:`ActivityClassifier` model. Examples -------- .. sourcecode:: python >>> import turicreate as tc # Training on dummy data >>> data = tc.SFrame({ ... 'accelerometer_x': [0.1, 0.2, 0.3, 0.4, 0.5] * 10, ... 'accelerometer_y': [0.5, 0.4, 0.3, 0.2, 0.1] * 10, ... 'accelerometer_z': [0.01, 0.01, 0.02, 0.02, 0.01] * 10, ... 'session_id': [0, 0, 0] * 10 + [1, 1] * 10, ... 'activity': ['walk', 'run', 'run'] * 10 + ['swim', 'swim'] * 10 ... }) # Create an activity classifier >>> model = tc.activity_classifier.create(data, ... session_id='session_id', target='activity', ... features=['accelerometer_x', 'accelerometer_y', 'accelerometer_z']) # Make predictions (as probability vector, or class) >>> predictions = model.predict(data) >>> predictions = model.predict(data, output_type='probability_vector') # Get both predictions and classes together >>> predictions = model.classify(data) # Get topk predictions (instead of only top-1) if your labels have more # 2 classes >>> predictions = model.predict_topk(data, k = 3) # Evaluate the model >>> results = model.evaluate(data) See Also -------- ActivityClassifier, util.random_split_by_session """ _tkutl._raise_error_if_not_sframe(dataset, "dataset") if not isinstance(target, str): raise _ToolkitError("target must be of type str") if not isinstance(session_id, str): raise _ToolkitError("session_id must be of type str") if not isinstance(batch_size, int): raise _ToolkitError("batch_size must be of type int") _tkutl._raise_error_if_sframe_empty(dataset, "dataset") _tkutl._numeric_param_check_range("prediction_window", prediction_window, 1, 400) _tkutl._numeric_param_check_range("max_iterations", max_iterations, 0, _six.MAXSIZE) if features is None: features = _fe_tkutl.get_column_names( dataset, interpret_as_excluded=True, column_names=[session_id, target] ) if not hasattr(features, "__iter__"): raise TypeError("Input 'features' must be a list.") if not all([isinstance(x, str) for x in features]): raise TypeError("Invalid feature %s: Feature names must be of type str." % x) if len(features) == 0: raise TypeError("Input 'features' must contain at least one column name.") start_time = _time.time() dataset = _tkutl._toolkits_select_columns(dataset, features + [session_id, target]) _tkutl._raise_error_if_sarray_not_expected_dtype( dataset[target], target, [str, int] ) _tkutl._raise_error_if_sarray_not_expected_dtype( dataset[session_id], session_id, [str, int] ) for feature in features: _tkutl._handle_missing_values(dataset, feature, "training_dataset") # Check for missing values for sframe validation set if isinstance(validation_set, _SFrame): _tkutl._raise_error_if_sframe_empty(validation_set, "validation_set") for feature in features: _tkutl._handle_missing_values(validation_set, feature, "validation_set") # C++ model name = "activity_classifier" import turicreate as _turicreate # Imports tensorflow import turicreate.toolkits.libtctensorflow model = _turicreate.extensions.activity_classifier() options = {} options["prediction_window"] = prediction_window options["batch_size"] = batch_size options["max_iterations"] = max_iterations options["verbose"] = verbose options["_show_loss"] = False model.train(dataset, target, session_id, validation_set, options) return ActivityClassifier(model_proxy=model, name=name)
def create(input_dataset, target, feature=None, validation_set='auto', warm_start='auto', batch_size=256, max_iterations=500, verbose=True, **kwargs): """ Create a :class:`DrawingClassifier` model. Parameters ---------- dataset : SFrame Input data. The columns named by the ``feature`` and ``target`` parameters will be extracted for training the drawing classifier. target : string Name of the column containing the target variable. The values in this column must be of string or integer type. feature : string optional Name of the column containing the input drawings. The feature column can contain either bitmap-based drawings or stroke-based drawings. Bitmap-based drawing input can be a grayscale tc.Image of any size. Stroke-based drawing input must be in the following format: Every drawing must be represented by a list of strokes, where each stroke must be a list of points in the order in which they were drawn on the canvas. Each point must be a dictionary with two keys, "x" and "y", and their respective values must be numerical, i.e. either integer or float. validation_set : SFrame optional A dataset for monitoring the model's generalization performance. The format of this SFrame must be the same as the training set. By default this argument is set to 'auto' and a validation set is automatically sampled and used for progress printing. If validation_set is set to None, then no additional metrics are computed. The default value is 'auto'. warm_start : string optional A string to denote which pretrained model to use. Set to "auto" by default which uses a model trained on 245 of the 345 classes in the Quick, Draw! dataset. To disable warm start, pass in None to this argument. Here is a list of all the pretrained models that can be passed in as this argument: "auto": Uses quickdraw_245_v0 "quickdraw_245_v0": Uses a model trained on 245 of the 345 classes in the Quick, Draw! dataset. None: No Warm Start batch_size: int optional The number of drawings per training step. If not set, a default value of 256 will be used. If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve performance. max_iterations : int optional The maximum number of allowed passes through the data. More passes over the data can result in a more accurately trained model. verbose : bool optional If True, print progress updates and model details. Returns ------- out : DrawingClassifier A trained :class:`DrawingClassifier` model. See Also -------- DrawingClassifier Examples -------- .. sourcecode:: python # Train a drawing classifier model >>> model = turicreate.drawing_classifier.create(data) # Make predictions on the training set and as column to the SFrame >>> data['predictions'] = model.predict(data) """ import mxnet as _mx from mxnet import autograd as _autograd from ._model_architecture import Model as _Model from ._sframe_loader import SFrameClassifierIter as _SFrameClassifierIter from .._mxnet import _mxnet_utils import warnings accepted_values_for_warm_start = ["auto", "quickdraw_245_v0", None] params = { 'use_tensorflow': False, } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError( 'Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) # @TODO: Should be able to automatically choose number of iterations # based on data size: Tracked in Github Issue #1576 if not isinstance(input_dataset, _tc.SFrame): raise TypeError('"input_dataset" must be of type SFrame.') # automatically infer feature column if feature is None: warnings.warn( "Not specifying a feature column is deprecate. This functionality will be removed" + " in the next major release. Please specify a \"feature\" value to" + " turicreate.drawing_classifier.create.") feature = _tkutl._find_only_drawing_column(input_dataset) _raise_error_if_not_drawing_classifier_input_sframe( input_dataset, feature, target) if batch_size is not None and not isinstance(batch_size, int): raise TypeError("'batch_size' must be an integer >= 1") if batch_size is not None and batch_size < 1: raise ValueError("'batch_size' must be >= 1") if max_iterations is not None and not isinstance(max_iterations, int): raise TypeError("'max_iterations' must be an integer >= 1") if max_iterations is not None and max_iterations < 1: raise ValueError("'max_iterations' must be >= 1") is_stroke_input = (input_dataset[feature].dtype != _tc.Image) dataset = _extensions._drawing_classifier_prepare_data( input_dataset, feature) if is_stroke_input else input_dataset iteration = -1 classes = dataset[target].unique() classes = sorted(classes) if len(classes) == 1: _ToolkitError("The number of classes has to be greater than one") class_to_index = {name: index for index, name in enumerate(classes)} validation_set_corrective_string = ( "'validation_set' parameter must be " + "an SFrame, or None, or must be set to 'auto' for the toolkit to " + "automatically create a validation set.") if isinstance(validation_set, _tc.SFrame): _raise_error_if_not_drawing_classifier_input_sframe( validation_set, feature, target) is_validation_stroke_input = (validation_set[feature].dtype != _tc.Image) validation_dataset = _extensions._drawing_classifier_prepare_data( validation_set, feature) if is_validation_stroke_input else validation_set elif isinstance(validation_set, str): if validation_set == 'auto': if dataset.num_rows() >= 100: if verbose: print( "PROGRESS: Creating a validation set from 5 percent of training data. This may take a while.\n" " You can set ``validation_set=None`` to disable validation tracking.\n" ) dataset, validation_dataset = dataset.random_split( TRAIN_VALIDATION_SPLIT, exact=True) else: validation_set = None validation_dataset = _tc.SFrame() else: raise _ToolkitError("Unrecognized value for 'validation_set'. " + validation_set_corrective_string) elif validation_set is None: validation_dataset = _tc.SFrame() else: raise TypeError("Unrecognized type for 'validation_set'." + validation_set_corrective_string) _tkutl._handle_missing_values(dataset, feature, 'training_dataset') if len(validation_dataset) > 0: _tkutl._handle_missing_values(dataset, feature, 'validation_set') train_loader = _SFrameClassifierIter(dataset, batch_size, feature_column=feature, target_column=target, class_to_index=class_to_index, load_labels=True, shuffle=True, iterations=max_iterations) train_loader_to_compute_accuracy = _SFrameClassifierIter( dataset, batch_size, feature_column=feature, target_column=target, class_to_index=class_to_index, load_labels=True, shuffle=True, iterations=1) validation_loader = _SFrameClassifierIter(validation_dataset, batch_size, feature_column=feature, target_column=target, class_to_index=class_to_index, load_labels=True, shuffle=True, iterations=1) ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size) model = _Model(num_classes=len(classes), prefix="drawing_") model_params = model.collect_params() model_params.initialize(_mx.init.Xavier(), ctx=ctx) if warm_start is not None: if type(warm_start) is not str: raise TypeError("'warm_start' must be a string or None. " + "'warm_start' can take in the following values: " + str(accepted_values_for_warm_start)) if warm_start not in accepted_values_for_warm_start: raise _ToolkitError("Unrecognized value for 'warm_start': " + warm_start + ". 'warm_start' can take in the following " + "values: " + str(accepted_values_for_warm_start)) pretrained_model = _pre_trained_models.DrawingClassifierPreTrainedModel( warm_start) pretrained_model_params_path = pretrained_model.get_model_path() model.load_params(pretrained_model_params_path, ctx=ctx, allow_missing=True) if params['use_tensorflow']: ## TensorFlow implementation if verbose: print("Using TensorFlow") from ._tf_drawing_classifier import DrawingClassifierTensorFlowModel, _tf_train_model # To get weights: for warmstart Dense1 needs one forward pass to be initialised test_input = _mx.nd.uniform(0, 1, (1, 3) + (1, 28, 28)) model_output = model.forward(test_input[0]) # Define the TF Model tf_model = DrawingClassifierTensorFlowModel(validation_set, model_params, batch_size, len(classes), verbose) # Train final_train_accuracy, final_val_accuracy, final_train_loss, total_train_time = _tf_train_model( tf_model, train_loader, validation_loader, validation_set, batch_size, len(classes), verbose) # Transfer weights from TF to MXNET model net_params = tf_model.export_weights() for k in net_params.keys(): model_params[k].set_data(net_params[k]) else: ## MXNET implementation if verbose: print("Using MXNET") start_time = _time.time() softmax_cross_entropy = _mx.gluon.loss.SoftmaxCrossEntropyLoss() model.hybridize() trainer = _mx.gluon.Trainer(model.collect_params(), 'adam') if verbose and iteration == -1: column_names = [ 'iteration', 'train_loss', 'train_accuracy', 'time' ] column_titles = [ 'Iteration', 'Training Loss', 'Training Accuracy', 'Elapsed Time (seconds)' ] if validation_set is not None: column_names.insert(3, 'validation_accuracy') column_titles.insert(3, 'Validation Accuracy') table_printer = _tc.util._ProgressTablePrinter( column_names, column_titles) train_accuracy = _mx.metric.Accuracy() validation_accuracy = _mx.metric.Accuracy() def get_data_and_label_from_batch(batch): if batch.pad is not None: size = batch_size - batch.pad sliced_data = _mx.nd.slice_axis(batch.data[0], axis=0, begin=0, end=size) sliced_label = _mx.nd.slice_axis(batch.label[0], axis=0, begin=0, end=size) num_devices = min(sliced_data.shape[0], len(ctx)) batch_data = _mx.gluon.utils.split_and_load( sliced_data, ctx_list=ctx[:num_devices], even_split=False) batch_label = _mx.gluon.utils.split_and_load( sliced_label, ctx_list=ctx[:num_devices], even_split=False) else: batch_data = _mx.gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) batch_label = _mx.gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) return batch_data, batch_label def compute_accuracy(accuracy_metric, batch_loader): batch_loader.reset() accuracy_metric.reset() for batch in batch_loader: batch_data, batch_label = get_data_and_label_from_batch(batch) outputs = [] for x, y in zip(batch_data, batch_label): if x is None or y is None: continue z = model(x) outputs.append(z) accuracy_metric.update(batch_label, outputs) for train_batch in train_loader: train_batch_data, train_batch_label = get_data_and_label_from_batch( train_batch) with _autograd.record(): # Inside training scope for x, y in zip(train_batch_data, train_batch_label): z = model(x) # Computes softmax cross entropy loss. loss = softmax_cross_entropy(z, y) # Backpropagate the error for one iteration. loss.backward() # Make one step of parameter update. Trainer needs to know the # batch size of data to normalize the gradient by 1/batch_size. trainer.step(train_batch.data[0].shape[0], ignore_stale_grad=True) # calculate training metrics train_loss = loss.mean().asscalar() if train_batch.iteration > iteration: # Compute training accuracy compute_accuracy(train_accuracy, train_loader_to_compute_accuracy) # Compute validation accuracy if validation_set is not None: compute_accuracy(validation_accuracy, validation_loader) iteration = train_batch.iteration if verbose: kwargs = { "iteration": iteration + 1, "train_loss": float(train_loss), "train_accuracy": train_accuracy.get()[1], "time": _time.time() - start_time } if validation_set is not None: kwargs[ "validation_accuracy"] = validation_accuracy.get( )[1] table_printer.print_row(**kwargs) final_train_accuracy = train_accuracy.get()[1] final_val_accuracy = validation_accuracy.get( )[1] if validation_set else None final_train_loss = train_loss total_train_time = _time.time() - start_time state = { '_model': model, '_class_to_index': class_to_index, 'num_classes': len(classes), 'classes': classes, 'input_image_shape': (1, BITMAP_WIDTH, BITMAP_HEIGHT), 'training_loss': final_train_loss, 'training_accuracy': final_train_accuracy, 'training_time': total_train_time, 'validation_accuracy': final_val_accuracy, # None if validation_set=None 'max_iterations': max_iterations, 'target': target, 'feature': feature, 'num_examples': len(input_dataset) } return DrawingClassifier(state)
def create(dataset, annotations=None, feature=None, model="darknet-yolo", classes=None, batch_size=0, max_iterations=0, verbose=True, grid_shape=[13, 13], **kwargs): """ Create a :class:`ObjectDetector` model. Parameters ---------- dataset : SFrame Input data. The columns named by the ``feature`` and ``annotations`` parameters will be extracted for training the detector. annotations : string Name of the column containing the object detection annotations. This column should be a list of dictionaries (or a single dictionary), with each dictionary representing a bounding box of an object instance. Here is an example of the annotations for a single image with two object instances:: [{'label': 'dog', 'type': 'rectangle', 'coordinates': {'x': 223, 'y': 198, 'width': 130, 'height': 230}}, {'label': 'cat', 'type': 'rectangle', 'coordinates': {'x': 40, 'y': 73, 'width': 80, 'height': 123}}] The value for `x` is the horizontal center of the box paired with `width` and `y` is the vertical center of the box paired with `height`. 'None' (the default) indicates the only list column in `dataset` should be used for the annotations. feature : string Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. model : string optional Object detection model to use: - "darknet-yolo" : Fast and medium-sized model grid_shape : array optional Shape of the grid used for object detection. Higher values increase precision for small objects, but at a higher computational cost - [13, 13] : Default grid value for a Fast and medium-sized model classes : list optional List of strings containing the names of the classes of objects. Inferred from the data if not provided. batch_size: int The number of images per training iteration. If 0, then it will be automatically determined based on resource availability. max_iterations : int The number of training iterations. If 0, then it will be automatically be determined based on the amount of data you provide. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ObjectDetector A trained :class:`ObjectDetector` model. See Also -------- ObjectDetector Examples -------- .. sourcecode:: python # Train an object detector model >>> model = turicreate.object_detector.create(data) # Make predictions on the training set and as column to the SFrame >>> data['predictions'] = model.predict(data) # Visualize predictions by generating a new column of marked up images >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions']) """ _raise_error_if_not_sframe(dataset, "dataset") if len(dataset) == 0: raise _ToolkitError("Unable to train on empty dataset") _numeric_param_check_range("max_iterations", max_iterations, 0, _six.MAXSIZE) start_time = _time.time() supported_detectors = ["darknet-yolo"] if feature is None: feature = _tkutl._find_only_image_column(dataset) if verbose: print("Using '%s' as feature column" % feature) if annotations is None: annotations = _tkutl._find_only_column_of_type( dataset, target_type=[list, dict], type_name="list", col_name="annotations") if verbose: print("Using '%s' as annotations column" % annotations) _raise_error_if_not_detection_sframe(dataset, feature, annotations, require_annotations=True) _tkutl._handle_missing_values(dataset, feature, "dataset") _tkutl._check_categorical_option_type("model", model, supported_detectors) base_model = model.split("-", 1)[0] ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]() pretrained_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[ "darknet_mlmodel"]() pretrained_model_path = pretrained_model.get_model_path() params = { "anchors": [ (1.0, 2.0), (1.0, 1.0), (2.0, 1.0), (2.0, 4.0), (2.0, 2.0), (4.0, 2.0), (4.0, 8.0), (4.0, 4.0), (8.0, 4.0), (8.0, 16.0), (8.0, 8.0), (16.0, 8.0), (16.0, 32.0), (16.0, 16.0), (32.0, 16.0), ], "grid_shape": grid_shape, "aug_resize": 0, "aug_rand_crop": 0.9, "aug_rand_pad": 0.9, "aug_rand_gray": 0.0, "aug_aspect_ratio": 1.25, "aug_hue": 0.05, "aug_brightness": 0.05, "aug_saturation": 0.05, "aug_contrast": 0.05, "aug_horizontal_flip": True, "aug_min_object_covered": 0, "aug_min_eject_coverage": 0.5, "aug_area_range": (0.15, 2), "aug_pca_noise": 0.0, "aug_max_attempts": 20, "aug_inter_method": 2, "lmb_coord_xy": 10.0, "lmb_coord_wh": 10.0, "lmb_obj": 100.0, "lmb_noobj": 5.0, "lmb_class": 2.0, "non_maximum_suppression_threshold": 0.45, "rescore": True, "clip_gradients": 0.025, "weight_decay": 0.0005, "sgd_momentum": 0.9, "learning_rate": 1.0e-3, "shuffle": True, "mps_loss_mult": 8, # This large buffer size (8 batches) is an attempt to mitigate against # the SFrame shuffle operation that can occur after each epoch. "io_thread_buffer_size": 8, "mlmodel_path": pretrained_model_path, } # create tensorflow model here import turicreate.toolkits.libtctensorflow if classes == None: classes = [] _raise_error_if_not_iterable(classes) _raise_error_if_not_iterable(grid_shape) grid_shape = [int(x) for x in grid_shape] assert len(grid_shape) == 2 tf_config = { "grid_height": params["grid_shape"][0], "grid_width": params["grid_shape"][1], "mlmodel_path": params["mlmodel_path"], "classes": classes, "compute_final_metrics": False, "verbose": verbose, "model": "darknet-yolo", } # If batch_size or max_iterations = 0, they will be automatically # generated in C++. if batch_size > 0: tf_config["batch_size"] = batch_size if max_iterations > 0: tf_config["max_iterations"] = max_iterations model = _tc.extensions.object_detector() model.train( data=dataset, annotations_column_name=annotations, image_column_name=feature, options=tf_config, ) return ObjectDetector(model_proxy=model, name="object_detector")
def create(style_dataset, content_dataset, style_feature=None, content_feature=None, max_iterations=None, model='resnet-16', verbose=True, batch_size=6, **kwargs): """ Create a :class:`StyleTransfer` model. Parameters ---------- style_dataset: SFrame Input style images. The columns named by the ``style_feature`` parameters will be extracted for training the model. content_dataset : SFrame Input content images. The columns named by the ``content_feature`` parameters will be extracted for training the model. style_feature: string Name of the column containing the input images in style SFrame. 'None' (the default) indicates the only image column in the style SFrame should be used as the feature. content_feature: string Name of the column containing the input images in content SFrame. 'None' (the default) indicates the only image column in the content SFrame should be used as the feature. max_iterations : int The number of training iterations. If 'None' (the default), then it will be automatically determined based on the amount of data you provide. model : string optional Style transfer model to use: - "resnet-16" : Fast and small-sized residual network that uses VGG-16 as reference network during training. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve training throughput. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : StyleTransfer A trained :class:`StyleTransfer` model. See Also -------- StyleTransfer Examples -------- .. sourcecode:: python # Create datasets >>> content_dataset = turicreate.image_analysis.load_images('content_images/') >>> style_dataset = turicreate.image_analysis.load_images('style_images/') # Train a style transfer model >>> model = turicreate.style_transfer.create(content_dataset, style_dataset) # Stylize an image on all styles >>> stylized_images = model.stylize(data) # Visualize the stylized images >>> stylized_images.explore() """ if len(style_dataset) == 0: raise _ToolkitError("style_dataset SFrame cannot be empty") if len(content_dataset) == 0: raise _ToolkitError("content_dataset SFrame cannot be empty") if (batch_size < 1): raise _ToolkitError("'batch_size' must be greater than or equal to 1") if max_iterations is not None and (not isinstance(max_iterations, int) or max_iterations < 0): raise _ToolkitError( "'max_iterations' must be an integer greater than or equal to 0") from ._sframe_loader import SFrameSTIter as _SFrameSTIter import mxnet as _mx from .._mxnet import _mxnet_utils if style_feature is None: style_feature = _tkutl._find_only_image_column(style_dataset) if content_feature is None: content_feature = _tkutl._find_only_image_column(content_dataset) if verbose: print("Using '{}' in style_dataset as feature column and using " "'{}' in content_dataset as feature column".format( style_feature, content_feature)) _raise_error_if_not_training_sframe(style_dataset, style_feature) _raise_error_if_not_training_sframe(content_dataset, content_feature) _tkutl._handle_missing_values(style_dataset, style_feature, 'style_dataset') _tkutl._handle_missing_values(content_dataset, content_feature, 'content_dataset') params = { 'batch_size': batch_size, 'vgg16_content_loss_layer': 2, # conv3_3 layer 'lr': 0.001, 'content_loss_mult': 1.0, 'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4], # conv 1-4 layers 'finetune_all_params': True, 'pretrained_weights': False, 'print_loss_breakdown': False, 'input_shape': (256, 256), 'training_content_loader_type': 'stretch', 'use_augmentation': False, 'sequential_image_processing': False, # Only used if use_augmentaion is True 'aug_resize': 0, 'aug_min_object_covered': 0, 'aug_rand_crop': 0.9, 'aug_rand_pad': 0.9, 'aug_rand_gray': 0.0, 'aug_aspect_ratio': 1.25, 'aug_hue': 0.05, 'aug_brightness': 0.05, 'aug_saturation': 0.05, 'aug_contrast': 0.05, 'aug_horizontal_flip': True, 'aug_area_range': (.05, 1.5), 'aug_pca_noise': 0.0, 'aug_max_attempts': 20, 'aug_inter_method': 2, 'checkpoint': False, 'checkpoint_prefix': 'style_transfer', 'checkpoint_increment': 1000 } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError( 'Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) _content_loss_mult = params['content_loss_mult'] _style_loss_mult = params['style_loss_mult'] num_gpus = _mxnet_utils.get_num_gpus_in_use( max_devices=params['batch_size']) use_mps = _use_mps() and num_gpus == 0 batch_size_each = params['batch_size'] // max(num_gpus, 1) batch_size = max(num_gpus, 1) * batch_size_each input_shape = params['input_shape'] iterations = 0 if max_iterations is None or max_iterations == 0: max_iterations = len(style_dataset) * 10000 if verbose: print('Setting max_iterations to be {}'.format(max_iterations)) # data loader if params['use_augmentation']: content_loader_type = '%s-with-augmentation' % params[ 'training_content_loader_type'] else: content_loader_type = params['training_content_loader_type'] ctx = _mxnet_utils.get_mxnet_context(max_devices=params['batch_size']) num_styles = len(style_dataset) # TRANSFORMER MODEL from ._model import Transformer as _Transformer transformer_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[ model]().get_model_path() transformer = _Transformer(num_styles, batch_size_each) transformer.collect_params().initialize(ctx=ctx) if params['pretrained_weights'] or use_mps: transformer.load_params(transformer_model_path, ctx, allow_missing=True) # For some reason, the transformer fails to hybridize for training, so we # avoid this until resolved # transformer.hybridize() # VGG MODEL from ._model import Vgg16 as _Vgg16 vgg_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS['Vgg16']( ).get_model_path() vgg_model = _Vgg16() vgg_model.collect_params().initialize(ctx=ctx) vgg_model.load_params(vgg_model_path, ctx=ctx, ignore_extra=True) vgg_model.hybridize() # TRAINER from mxnet import gluon as _gluon from ._model import gram_matrix as _gram_matrix if params['finetune_all_params']: trainable_params = transformer.collect_params() else: trainable_params = transformer.collect_params('.*gamma|.*beta') trainer = _gluon.Trainer(trainable_params, 'adam', {'learning_rate': params['lr']}) mse_loss = _gluon.loss.L2Loss() start_time = _time.time() smoothed_loss = None last_time = 0 cuda_gpus = _mxnet_utils.get_gpus_in_use(max_devices=params['batch_size']) num_mxnet_gpus = len(cuda_gpus) if verbose: # Estimate memory usage (based on experiments) cuda_mem_req = 260 + batch_size_each * 880 + num_styles * 1.4 _tkutl._print_neural_compute_device(cuda_gpus=cuda_gpus, use_mps=use_mps, cuda_mem_req=cuda_mem_req, has_mps_impl=False) # # Pre-compute gram matrices for style images # if verbose: print('Analyzing visual features of the style images') if use_mps: batch_size = 1 content_images_loader = _SFrameSTIter( content_dataset, batch_size, shuffle=True, feature_column=content_feature, input_shape=input_shape, loader_type=content_loader_type, aug_params=params, sequential=params['sequential_image_processing']) style_images_loader = _SFrameSTIter( style_dataset, batch_size, shuffle=False, num_epochs=1, feature_column=style_feature, input_shape=input_shape, loader_type='stretch', sequential=params['sequential_image_processing']) style_sa = style_dataset[style_feature] idx_column = _tc.SArray(range(0, style_sa.shape[0])) style_sframe = _tc.SFrame({"style": idx_column, style_feature: style_sa}) rs = _np.random.RandomState(1234) if use_mps: mxnet_mps_key_map = _MpsStyleGraphAPI.mxnet_mps_weight_dict() # By passing in dummy values to the network this causes MXNet to trigger # initialization for both the Transformer and VGG16 networks. transformer.batch_size = 1 test_input = _mx.nd.uniform(0, 1, (1, 3) + input_shape) transformer_output = transformer.forward(test_input, _mx.nd.array([0])) vgg16_s = _vgg16_data_prep(transformer_output) vgg_output = vgg_model.forward(vgg16_s) vgg16_t = _vgg16_data_prep(test_input) content_output = vgg_model.forward(vgg16_t) net_params = transformer.collect_params() vgg_params = vgg_model.collect_params() mps_net_params = {} keys = list(net_params) vgg_keys = list(vgg_params) for k in keys: mps_net_params[mxnet_mps_key_map[k]] = _mxnet_to_mps( net_params[k].data().asnumpy()) for k in vgg_keys: mps_net_params[mxnet_mps_key_map[k]] = _mxnet_to_mps( vgg_params[k].data().asnumpy()) mps_config = { 'mode': _MpsGraphMode.Train, 'use_sgd': True, 'st_include_network': True, 'st_include_loss': True, 'st_vgg16_content_loss_layer': params['vgg16_content_loss_layer'], 'st_lr': params['lr'], 'st_content_loss_mult': params['content_loss_mult'], 'st_style_loss_mult': params['style_loss_mult'], 'st_finetune_all_params': params['finetune_all_params'], # TODO: plumb through this usage "st_num_styles": num_styles } # TODO: Plumb through Predict with multiple batches # output = mps_net.predict(_mxnet_to_mps(test_input.asnumpy())) # mps_z = output.asnumpy().reshape(1, 256, 256, 3) # z = _mps_to_mxnet(mps_z) mps_net = _get_mps_st_net(input_image_shape=(3, input_shape[0], input_shape[1]), batch_size=batch_size, output_size=(3, input_shape[0], input_shape[1]), config=mps_config, weights=mps_net_params) style_images = [] for s_batch in style_images_loader: s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0) style_images.append(s_data[0]) while iterations < max_iterations: idx = rs.randint(num_styles, size=1)[0] style_image = style_images[idx] c_batch = content_images_loader.next() c_data = _gluon.utils.split_and_load(c_batch.data[0], ctx_list=ctx, batch_axis=0) content_image = c_data[0] loss = mps_net.train(_mxnet_to_mps(content_image.asnumpy()), _mxnet_to_mps(style_image.asnumpy()), idx) cur_loss = loss.asnumpy()[0] if smoothed_loss is None: smoothed_loss = cur_loss else: smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss if verbose and iterations == 0: # Print progress table header column_names = ['Iteration', 'Loss', 'Elapsed Time'] num_columns = len(column_names) column_width = max(map(lambda x: len(x), column_names)) + 2 hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+' print(hr) print(('| {:<{width}}' * num_columns + '|').format( *column_names, width=column_width - 1)) print(hr) cur_time = _time.time() if verbose and (cur_time > last_time + 10 or iterations == max_iterations): # Print progress table row elapsed_time = cur_time - start_time print( "| {cur_iter:<{width}}| {loss:<{width}.3f}| {time:<{width}.1f}|" .format(cur_iter=iterations, loss=smoothed_loss, time=elapsed_time, width=column_width - 1)) last_time = cur_time iterations = iterations + 1 if iterations == max_iterations: print(hr) break mps_weights = mps_net.export() mps_mxnet_key_map = _MpsStyleGraphAPI.mps_mxnet_weight_dict() for key in mps_weights: if "transformer" in key and "conv" in key: weight = transformer.collect_params()[ mps_mxnet_key_map[key]].data() weight = _mx.nd.array(_mps_to_mxnet(mps_weights[key])) if "transformer" in key and "inst" in key: weight = transformer.collect_params()[ mps_mxnet_key_map[key]].data() weight = _mx.nd.array( _mps_to_mxnet(mps_weights[key]).reshape(weight.shape)) training_time = _time.time() - start_time state = { '_model': transformer, '_training_time_as_string': _seconds_as_string(training_time), 'batch_size': batch_size, 'num_styles': num_styles, 'model': model, 'input_image_shape': input_shape, 'styles': style_sframe, 'num_content_images': len(content_dataset), 'training_time': training_time, 'max_iterations': max_iterations, 'training_iterations': max_iterations, 'training_epochs': content_images_loader.cur_epoch, 'style_feature': style_feature, 'content_feature': content_feature, "_index_column": "style", 'training_loss': smoothed_loss, } return StyleTransfer(state) num_layers = len(params['style_loss_mult']) gram_chunks = [[] for _ in range(num_layers)] for s_batch in style_images_loader: s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0) for s in s_data: vgg16_s = _vgg16_data_prep(s) ret = vgg_model(vgg16_s) grams = [_gram_matrix(x) for x in ret] for i, gram in enumerate(grams): if gram.context != _mx.cpu(0): gram = gram.as_in_context(_mx.cpu(0)) gram_chunks[i].append(gram) del style_images_loader grams = [ # The concatenated styles may be padded, so we slice overflow _mx.nd.concat(*chunks, dim=0)[:num_styles] for chunks in gram_chunks ] # A context->grams look-up table, where all the gram matrices have been # distributed ctx_grams = {} if ctx[0] == _mx.cpu(0): ctx_grams[_mx.cpu(0)] = grams else: for ctx0 in ctx: ctx_grams[ctx0] = [gram.as_in_context(ctx0) for gram in grams] # # Training loop # vgg_content_loss_layer = params['vgg16_content_loss_layer'] while iterations < max_iterations: content_images_loader.reset() for c_batch in content_images_loader: c_data = _gluon.utils.split_and_load(c_batch.data[0], ctx_list=ctx, batch_axis=0) Ls = [] curr_content_loss = [] curr_style_loss = [] with _mx.autograd.record(): for c in c_data: # Randomize styles to train indices = _mx.nd.array(rs.randint(num_styles, size=batch_size_each), dtype=_np.int64, ctx=c.context) # Generate pastiche p = transformer(c, indices) # mean subtraction vgg16_p = _vgg16_data_prep(p) vgg16_c = _vgg16_data_prep(c) # vgg forward p_vgg_outputs = vgg_model(vgg16_p) c_vgg_outputs = vgg_model(vgg16_c) c_content_layer = c_vgg_outputs[vgg_content_loss_layer] p_content_layer = p_vgg_outputs[vgg_content_loss_layer] # Calculate Loss # Style Loss between style image and stylized image # Ls = sum of L2 norm of gram matrix of vgg16's conv layers style_losses = [] for gram, p_vgg_output, style_loss_mult in zip( ctx_grams[c.context], p_vgg_outputs, _style_loss_mult): gram_s_vgg = gram[indices] gram_p_vgg = _gram_matrix(p_vgg_output) style_losses.append(style_loss_mult * mse_loss(gram_s_vgg, gram_p_vgg)) style_loss = _mx.nd.add_n(*style_losses) # Content Loss between content image and stylized image # Lc = L2 norm at a single layer in vgg16 content_loss = _content_loss_mult * mse_loss( c_content_layer, p_content_layer) curr_content_loss.append(content_loss) curr_style_loss.append(style_loss) # Divide loss by large number to get into a more legible # range total_loss = (content_loss + style_loss) / 10000.0 Ls.append(total_loss) for L in Ls: L.backward() cur_loss = _np.mean([L.asnumpy()[0] for L in Ls]) if smoothed_loss is None: smoothed_loss = cur_loss else: smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss iterations += 1 if params['checkpoint'] and iterations % params[ 'checkpoint_increment'] == 0: checkpoint_filename = params['checkpoint_prefix'] + "-" + str( iterations) + ".model" training_time = _time.time() - start_time state = { '_model': transformer, '_training_time_as_string': _seconds_as_string(training_time), 'batch_size': batch_size, 'num_styles': num_styles, 'model': model, 'input_image_shape': input_shape, 'styles': style_sframe, 'num_content_images': len(content_dataset), 'training_time': training_time, 'max_iterations': max_iterations, 'training_iterations': iterations, 'training_epochs': content_images_loader.cur_epoch, 'style_feature': style_feature, 'content_feature': content_feature, "_index_column": "style", 'training_loss': smoothed_loss, } st_model = StyleTransfer(state) st_model.save(checkpoint_filename) trainer.step(batch_size) if verbose and iterations == 1: # Print progress table header column_names = ['Iteration', 'Loss', 'Elapsed Time'] num_columns = len(column_names) column_width = max(map(lambda x: len(x), column_names)) + 2 hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+' print(hr) print(('| {:<{width}}' * num_columns + '|').format( *column_names, width=column_width - 1)) print(hr) cur_time = _time.time() if verbose and (cur_time > last_time + 10 or iterations == max_iterations): # Print progress table row elapsed_time = cur_time - start_time print( "| {cur_iter:<{width}}| {loss:<{width}.3f}| {time:<{width}.1f}|" .format(cur_iter=iterations, loss=smoothed_loss, time=elapsed_time, width=column_width - 1)) if params['print_loss_breakdown']: print_content_loss = _np.mean( [L.asnumpy()[0] for L in curr_content_loss]) print_style_loss = _np.mean( [L.asnumpy()[0] for L in curr_style_loss]) print( 'Total Loss: {:6.3f} | Content Loss: {:6.3f} | Style Loss: {:6.3f}' .format(cur_loss, print_content_loss, print_style_loss)) last_time = cur_time if iterations == max_iterations: print(hr) break training_time = _time.time() - start_time # Save the model state state = { '_model': transformer, '_training_time_as_string': _seconds_as_string(training_time), 'batch_size': batch_size, 'num_styles': num_styles, 'model': model, 'input_image_shape': input_shape, 'styles': style_sframe, 'num_content_images': len(content_dataset), 'training_time': training_time, 'max_iterations': max_iterations, 'training_iterations': iterations, 'training_epochs': content_images_loader.cur_epoch, 'style_feature': style_feature, 'content_feature': content_feature, "_index_column": "style", 'training_loss': smoothed_loss, } return StyleTransfer(state)
def create(dataset, session_id, target, features=None, prediction_window=100, validation_set='auto', max_iterations=10, batch_size=32, verbose=True, **kwargs): """ Create an :class:`ActivityClassifier` model. Parameters ---------- dataset : SFrame Input data which consists of `sessions` of data where each session is a sequence of data. The data must be in `stacked` format, grouped by session. Within each session, the data is assumed to be sorted temporally. Columns in `features` will be used to train a model that will make a prediction using labels in the `target` column. session_id : string Name of the column that contains a unique ID for each session. target : string Name of the column containing the target variable. The values in this column must be of string or integer type. Use `model.classes` to retrieve the order in which the classes are mapped. features : list[string], optional Name of the columns containing the input features that will be used for classification. If set to `None`, all columns except `session_id` and `target` will be used. prediction_window : int, optional Number of time units between predictions. For example, if your input data is sampled at 100Hz, and the `prediction_window` is set to 100, then this model will make a prediction every 1 second. validation_set : SFrame, optional A dataset for monitoring the model's generalization performance to prevent the model from overfitting to the training data. For each row of the progress table, accuracy is measured over the provided training dataset and the `validation_set`. The format of this SFrame must be the same as the training set. When set to 'auto', a validation set is automatically sampled from the training data (if the training data has > 100 sessions). If validation_set is set to None, then all the data will be used for training. max_iterations : int , optional Maximum number of iterations/epochs made over the data during the training phase. batch_size : int, optional Number of sequence chunks used per training step. Must be greater than the number of GPUs in use. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ActivityClassifier A trained :class:`ActivityClassifier` model. Examples -------- .. sourcecode:: python >>> import turicreate as tc # Training on dummy data >>> data = tc.SFrame({ ... 'accelerometer_x': [0.1, 0.2, 0.3, 0.4, 0.5] * 10, ... 'accelerometer_y': [0.5, 0.4, 0.3, 0.2, 0.1] * 10, ... 'accelerometer_z': [0.01, 0.01, 0.02, 0.02, 0.01] * 10, ... 'session_id': [0, 0, 0] * 10 + [1, 1] * 10, ... 'activity': ['walk', 'run', 'run'] * 10 + ['swim', 'swim'] * 10 ... }) # Create an activity classifier >>> model = tc.activity_classifier.create(data, ... session_id='session_id', target='activity', ... features=['accelerometer_x', 'accelerometer_y', 'accelerometer_z']) # Make predictions (as probability vector, or class) >>> predictions = model.predict(data) >>> predictions = model.predict(data, output_type='probability_vector') # Get both predictions and classes together >>> predictions = model.classify(data) # Get topk predictions (instead of only top-1) if your labels have more # 2 classes >>> predictions = model.predict_topk(data, k = 3) # Evaluate the model >>> results = model.evaluate(data) See Also -------- ActivityClassifier, util.random_split_by_session """ from .._mxnet import _mxnet_utils from ._mx_model_architecture import _net_params from ._sframe_sequence_iterator import SFrameSequenceIter as _SFrameSequenceIter from ._sframe_sequence_iterator import prep_data as _prep_data from ._mx_model_architecture import _define_model_mxnet, _fit_model_mxnet from ._mps_model_architecture import _define_model_mps, _fit_model_mps from .._mps_utils import (use_mps as _use_mps, mps_device_name as _mps_device_name, ac_weights_mps_to_mxnet as _ac_weights_mps_to_mxnet) _tkutl._raise_error_if_not_sframe(dataset, "dataset") if not isinstance(target, str): raise _ToolkitError('target must be of type str') if not isinstance(session_id, str): raise _ToolkitError('session_id must be of type str') _tkutl._raise_error_if_sframe_empty(dataset, 'dataset') _tkutl._numeric_param_check_range('prediction_window', prediction_window, 1, 400) _tkutl._numeric_param_check_range('max_iterations', max_iterations, 0, _six.MAXSIZE) if features is None: features = _fe_tkutl.get_column_names( dataset, interpret_as_excluded=True, column_names=[session_id, target]) if not hasattr(features, '__iter__'): raise TypeError("Input 'features' must be a list.") if not all([isinstance(x, str) for x in features]): raise TypeError( "Invalid feature %s: Feature names must be of type str." % x) if len(features) == 0: raise TypeError( "Input 'features' must contain at least one column name.") start_time = _time.time() dataset = _tkutl._toolkits_select_columns(dataset, features + [session_id, target]) _tkutl._raise_error_if_sarray_not_expected_dtype(dataset[target], target, [str, int]) _tkutl._raise_error_if_sarray_not_expected_dtype(dataset[session_id], session_id, [str, int]) params = {'use_tensorflow': False, 'show_deprecated_warnings': False} if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError( 'Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) if params['use_tensorflow'] and not (params['show_deprecated_warnings']): # Imports tensorflow import tensorflow as _tf from ._tf_model_architecture import ActivityTensorFlowModel, _fit_model_tf # Supresses verbosity to only errors _tf.compat.v1.logging.set_verbosity(_tf.compat.v1.logging.ERROR) if isinstance(validation_set, str) and validation_set == 'auto': # Computing the number of unique sessions in this way is relatively # expensive. Ideally we'd incorporate this logic into the C++ code that # chunks the raw data by prediction window. # TODO: https://github.com/apple/turicreate/issues/991 unique_sessions = _SFrame({'session': dataset[session_id].unique()}) if len(unique_sessions) < _MIN_NUM_SESSIONS_FOR_SPLIT: print( "The dataset has less than the minimum of", _MIN_NUM_SESSIONS_FOR_SPLIT, "sessions required for train-validation split. Continuing without validation set" ) validation_set = None else: dataset, validation_set = _random_split_by_session( dataset, session_id) for feature in features: _tkutl._handle_missing_values(dataset, feature, 'training_dataset') # Encode the target column to numerical values use_target = target is not None dataset, target_map = _encode_target(dataset, target) predictions_in_chunk = 20 chunked_data, num_sessions = _prep_data(dataset, features, session_id, prediction_window, predictions_in_chunk, target=target, verbose=verbose) # Decide whether to use MPS GPU, MXnet GPU or CPU num_mxnet_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=num_sessions) use_mps = _use_mps() and num_mxnet_gpus == 0 and not ( params['use_tensorflow']) if verbose: if use_mps: print('Using GPU to create model ({})'.format(_mps_device_name())) elif num_mxnet_gpus == 1: print('Using GPU to create model (CUDA)') elif num_mxnet_gpus > 1: print( 'Using {} GPUs to create model (CUDA)'.format(num_mxnet_gpus)) elif params['use_tensorflow']: print('Using Tensorflow to create model') else: print('Using CPU to create model') # Create data iterators user_provided_batch_size = batch_size batch_size = max(batch_size, num_mxnet_gpus, 1) use_mx_data_batch = not (use_mps or params['use_tensorflow']) data_iter = _SFrameSequenceIter(chunked_data, len(features), prediction_window, predictions_in_chunk, batch_size, use_target=use_target, mx_output=use_mx_data_batch) if validation_set is not None: _tkutl._raise_error_if_not_sframe(validation_set, 'validation_set') _tkutl._raise_error_if_sframe_empty(validation_set, 'validation_set') validation_set = _tkutl._toolkits_select_columns( validation_set, features + [session_id, target]) for feature in features: _tkutl._handle_missing_values(dataset, feature, 'validation_set') validation_set = validation_set.filter_by(list(target_map.keys()), target) validation_set, mapping = _encode_target(validation_set, target, target_map) chunked_validation_set, _ = _prep_data(validation_set, features, session_id, prediction_window, predictions_in_chunk, target=target, verbose=False) valid_iter = _SFrameSequenceIter(chunked_validation_set, len(features), prediction_window, predictions_in_chunk, batch_size, use_target=use_target, mx_output=use_mx_data_batch) else: valid_iter = None # Define model architecture context = _mxnet_utils.get_mxnet_context(max_devices=num_sessions) # Always create MXNet models, as the pred_model is later saved to the state # If MPS is used - the loss_model will be overwritten loss_model, pred_model = _define_model_mxnet(len(target_map), prediction_window, predictions_in_chunk, context) if use_mps: loss_model = _define_model_mps(batch_size, len(features), len(target_map), prediction_window, predictions_in_chunk, is_prediction_model=False) log = _fit_model_mps(loss_model, data_iter, valid_iter, max_iterations, verbose) else: if params['use_tensorflow']: net_params = _initialize_with_mxnet_weights( loss_model, chunked_data, features, prediction_window, predictions_in_chunk, batch_size, use_target) ac_model = ActivityTensorFlowModel(net_params, batch_size, len(features), len(target_map), prediction_window, predictions_in_chunk) # Train the model using Tensorflow log = _fit_model_tf(ac_model, net_params, data_iter, valid_iter, max_iterations, verbose, 1e-3) else: # Train the model using Mxnet log = _fit_model_mxnet(loss_model, data_iter, valid_iter, max_iterations, num_mxnet_gpus, verbose) # Set up prediction model pred_model.bind(data_shapes=data_iter.provide_data, label_shapes=None, for_training=False) if use_mps: mps_params = loss_model.export() arg_params, aux_params = _ac_weights_mps_to_mxnet( mps_params, _net_params['lstm_h']) elif params['use_tensorflow']: # Copy the weights back in the MXNet format arg_params, aux_params = ac_model.get_weights() else: arg_params, aux_params = loss_model.get_params() pred_model.init_params(arg_params=arg_params, aux_params=aux_params) # Save the model state = { '_pred_model': pred_model, 'verbose': verbose, 'training_time': _time.time() - start_time, 'target': target, 'classes': sorted(target_map.keys()), 'features': features, 'session_id': session_id, 'prediction_window': prediction_window, 'max_iterations': max_iterations, 'num_examples': len(dataset), 'num_sessions': num_sessions, 'num_classes': len(target_map), 'num_features': len(features), 'training_accuracy': log['train_acc'], 'training_log_loss': log['train_loss'], '_target_id_map': target_map, '_id_target_map': {v: k for k, v in target_map.items()}, '_predictions_in_chunk': predictions_in_chunk, '_recalibrated_batch_size': data_iter.batch_size, 'batch_size': user_provided_batch_size } if validation_set is not None: state['valid_accuracy'] = log['valid_acc'] state['valid_log_loss'] = log['valid_loss'] model = ActivityClassifier(state) return model
def create(style_dataset, content_dataset, style_feature=None, content_feature=None, max_iterations=None, model='resnet-16', verbose=True, batch_size=1, **kwargs): """ Create a :class:`StyleTransfer` model. Parameters ---------- style_dataset: SFrame Input style images. The columns named by the ``style_feature`` parameters will be extracted for training the model. content_dataset : SFrame Input content images. The columns named by the ``content_feature`` parameters will be extracted for training the model. style_feature: string Name of the column containing the input images in style SFrame. 'None' (the default) indicates the only image column in the style SFrame should be used as the feature. content_feature: string Name of the column containing the input images in content SFrame. 'None' (the default) indicates the only image column in the content SFrame should be used as the feature. max_iterations : int The number of training iterations. If 'None' (the default), then it will be automatically determined based on the amount of data you provide. model : string optional Style transfer model to use: - "resnet-16" : Fast and small-sized residual network that uses VGG-16 as reference network during training. batch_size : int, optional If you are getting memory errors, try decreasing this value. If you have a powerful computer, increasing this value may improve training throughput. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : StyleTransfer A trained :class:`StyleTransfer` model. See Also -------- StyleTransfer Examples -------- .. sourcecode:: python # Create datasets >>> content_dataset = turicreate.image_analysis.load_images('content_images/') >>> style_dataset = turicreate.image_analysis.load_images('style_images/') # Train a style transfer model >>> model = turicreate.style_transfer.create(content_dataset, style_dataset) # Stylize an image on all styles >>> stylized_images = model.stylize(data) # Visualize the stylized images >>> stylized_images.explore() """ if not isinstance(style_dataset, _tc.SFrame): raise TypeError('"style_dataset" must be of type SFrame.') if not isinstance(content_dataset, _tc.SFrame): raise TypeError('"content_dataset" must be of type SFrame.') if len(style_dataset) == 0: raise _ToolkitError("style_dataset SFrame cannot be empty") if len(content_dataset) == 0: raise _ToolkitError("content_dataset SFrame cannot be empty") if (batch_size < 1): raise _ToolkitError("'batch_size' must be greater than or equal to 1") if max_iterations is not None and (not isinstance(max_iterations, int) or max_iterations < 0): raise _ToolkitError( "'max_iterations' must be an integer greater than or equal to 0") if style_feature is None: style_feature = _tkutl._find_only_image_column(style_dataset) if content_feature is None: content_feature = _tkutl._find_only_image_column(content_dataset) if verbose: print("Using '{}' in style_dataset as feature column and using " "'{}' in content_dataset as feature column".format( style_feature, content_feature)) _raise_error_if_not_training_sframe(style_dataset, style_feature) _raise_error_if_not_training_sframe(content_dataset, content_feature) _tkutl._handle_missing_values(style_dataset, style_feature, 'style_dataset') _tkutl._handle_missing_values(content_dataset, content_feature, 'content_dataset') params = { 'batch_size': batch_size, 'vgg16_content_loss_layer': 2, # conv3_3 layer 'lr': 0.001, 'content_loss_mult': 1.0, 'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4], # conv 1-4 layers 'finetune_all_params': True, 'pretrained_weights': False, 'print_loss_breakdown': False, 'input_shape': (256, 256), 'training_content_loader_type': 'stretch', 'use_augmentation': False, 'sequential_image_processing': False, # Only used if use_augmentaion is True 'aug_resize': 0, 'aug_min_object_covered': 0, 'aug_rand_crop': 0.9, 'aug_rand_pad': 0.9, 'aug_rand_gray': 0.0, 'aug_aspect_ratio': 1.25, 'aug_hue': 0.05, 'aug_brightness': 0.05, 'aug_saturation': 0.05, 'aug_contrast': 0.05, 'aug_horizontal_flip': True, 'aug_area_range': (.05, 1.5), 'aug_pca_noise': 0.0, 'aug_max_attempts': 20, 'aug_inter_method': 2, 'checkpoint': False, 'checkpoint_prefix': 'style_transfer', 'checkpoint_increment': 1000 } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError( 'Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) name = 'style_transfer' import turicreate as _turicreate # Imports tensorflow import turicreate.toolkits.libtctensorflow model = _turicreate.extensions.style_transfer() pretrained_resnet_model = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[ 'resnet-16']() pretrained_vgg16_model = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[ 'Vgg16']() options = {} options['image_height'] = params['input_shape'][0] options['image_width'] = params['input_shape'][1] options['content_feature'] = content_feature options['style_feature'] = style_feature if verbose is not None: options['verbose'] = verbose else: options['verbose'] = False if batch_size is not None: options['batch_size'] = batch_size if max_iterations is not None: options['max_iterations'] = max_iterations options['num_styles'] = len(style_dataset) options['resnet_mlmodel_path'] = pretrained_resnet_model.get_model_path( 'coreml') options['vgg_mlmodel_path'] = pretrained_vgg16_model.get_model_path( 'coreml') model.train(style_dataset[style_feature], content_dataset[content_feature], options) return StyleTransfer(model_proxy=model, name=name)