def _extract_features(self, preprocessed_data, verbose=True): """ Parameters ---------- preprocessed_data : SArray Returns ------- numpy array containing the deep features """ last_progress_update = _time.time() progress_header_printed = False deep_features = _tc.SArrayBuilder(_np.ndarray) if _mac_ver() < (10, 14): # Use TensorFlow/Keras # Transpose data from channel first to channel last preprocessed_data = _np.transpose(preprocessed_data, (0, 2, 3, 1)) for i, cur_example in enumerate(preprocessed_data): y = self.vggish_model.predict([[cur_example]]) deep_features.append(y[0]) # If `verbose` is set, print an progress update about every 20s if verbose and _time.time() - last_progress_update >= 20: if not progress_header_printed: print("Extracting deep features -") progress_header_printed = True print("Extracted {} of {}".format(i, len(preprocessed_data))) last_progress_update = _time.time() if progress_header_printed: print("Extracted {} of {}\n".format(len(preprocessed_data), len(preprocessed_data))) else: # Use Core ML for i, cur_example in enumerate(preprocessed_data): for cur_frame in cur_example: x = {'input1': _np.asarray([cur_frame])} y = self.vggish_model.predict(x) deep_features.append(y['output1']) # If `verbose` is set, print an progress update about every 20s if verbose and _time.time() - last_progress_update >= 20: if not progress_header_printed: print("Extracting deep features -") progress_header_printed = True print("Extracted {} of {}".format(i, len(preprocessed_data))) last_progress_update = _time.time() if progress_header_printed: print("Extracted {} of {}\n".format(len(preprocessed_data), len(preprocessed_data))) return deep_features.close()
def extract_features(self, dataset, feature, batch_size=512, verbose=False): """ Parameters ---------- dataset: SFrame SFrame of images """ from ..mx import SFrameImageIter as _SFrameImageIter import turicreate as _tc import array if len(dataset) == 0: return _tc.SArray([], array.array) # Resize images if needed preprocessed_dataset = _tc.SFrame() if verbose: print("Resizing images...") preprocessed_dataset[feature] = _tc.image_analysis.resize( dataset[feature], *tuple(reversed(self.image_shape))) batch_size = min(len(dataset), batch_size) # Make a data iterator dataIter = _SFrameImageIter(sframe=preprocessed_dataset, data_field=[feature], batch_size=batch_size) # Setup the MXNet model model = MXFeatureExtractor._get_mx_module(self.ptModel.mxmodel, self.data_layer, self.feature_layer, self.context, self.image_shape, batch_size) out = _tc.SArrayBuilder(dtype = array.array) num_processed = 0 if verbose: print("Performing feature extraction on resized images...") while dataIter.has_next: if dataIter.data_shape[1:] != self.image_shape: raise RuntimeError("Expected image of size %s. Got %s instead." % ( self.image_shape, dataIter.data_shape[1:])) model.forward(next(dataIter)) mx_out = [array.array('d',m) for m in model.get_outputs()[0].asnumpy()] if dataIter.getpad() != 0: # If batch size is not evenly divisible by the length, it will loop back around. # We don't want that. mx_out = mx_out[:-dataIter.getpad()] out.append_multiple(mx_out) num_processed += batch_size num_processed = min(len(dataset), num_processed) if verbose: print('Completed {num_processed:{width}d}/{total:{width}d}'.format( num_processed = num_processed, total=len(dataset), width = len(str(len(dataset))))) return out.close()
def _extract_features(self, preprocessed_data, verbose=True): """ Parameters ---------- preprocessed_data : SArray Returns ------- numpy array containing the deep features """ last_progress_update = _time.time() progress_header_printed = False deep_features = _tc.SArrayBuilder(_np.ndarray) if _mac_ver() < (10, 14): # Use MXNet preprocessed_data = mx.nd.array(preprocessed_data) ctx_list = self.ctx if len(preprocessed_data) < len(ctx_list): ctx_list = ctx_list[:len(preprocessed_data)] batches = utils.split_and_load(preprocessed_data, ctx_list=ctx_list, even_split=False) for i, cur_batch in enumerate(batches): y = self.vggish_model.forward(cur_batch).asnumpy() for j in y: deep_features.append(j) # If `verbose` is set, print an progress update about every 20s if verbose and _time.time() - last_progress_update >= 20: if not progress_header_printed: print("Extracting deep features -") progress_header_printed = True print("Extracted {} of {} batches".format(i, len(batches))) last_progress_update = _time.time() if progress_header_printed: print("Extracted {} of {} batches\n".format( len(batches), len(batches))) else: # Use Core ML for i, cur_example in enumerate(preprocessed_data): for cur_frame in cur_example: x = {'input1': [cur_frame]} y = self.vggish_model.predict(x) deep_features.append(y['output1']) # If `verbose` is set, print an progress update about every 20s if verbose and _time.time() - last_progress_update >= 20: if not progress_header_printed: print("Extracting deep features -") progress_header_printed = True print("Extracted {} of {}".format(i, len(preprocessed_data))) last_progress_update = _time.time() if progress_header_printed: print("Extracted {} of {}\n".format(len(preprocessed_data), len(preprocessed_data))) return deep_features.close()
def extract_features(self, dataset, feature, batch_size=64, verbose=False): """ Parameters ---------- dataset: SFrame SFrame of images """ from ._mxnet._mx_sframe_iter import SFrameImageIter as _SFrameImageIter from six.moves.queue import Queue as _Queue from threading import Thread as _Thread import turicreate as _tc import array if len(dataset) == 0: return _tc.SArray([], array.array) batch_size = min(len(dataset), batch_size) # Make a data iterator dataIter = _SFrameImageIter(sframe=dataset, data_field=[feature], batch_size=batch_size, image_shape=self.image_shape) # Setup the MXNet model model = MXFeatureExtractor._get_mx_module(self.ptModel.mxmodel, self.data_layer, self.feature_layer, self.context, self.image_shape, batch_size) out = _tc.SArrayBuilder(dtype=array.array) progress = {'num_processed': 0, 'total': len(dataset)} if verbose: print("Performing feature extraction on resized images...") # Encapsulates the work done by the MXNet model for a single batch def handle_request(batch): model.forward(batch) mx_out = [ array.array('d', m) for m in model.get_outputs()[0].asnumpy() ] if batch.pad != 0: # If batch size is not evenly divisible by the length, it will loop back around. # We don't want that. mx_out = mx_out[:-batch.pad] return mx_out # Copies the output from MXNet into the SArrayBuilder and emits progress def consume_response(mx_out): out.append_multiple(mx_out) progress['num_processed'] += len(mx_out) if verbose: print('Completed {num_processed:{width}d}/{total:{width}d}'. format(width=len(str(progress['total'])), **progress)) # Create a dedicated thread for performing MXNet work, using two FIFO # queues for communication back and forth with this thread, with the # goal of keeping MXNet busy throughout. request_queue = _Queue() response_queue = _Queue() def mx_worker(): while True: batch = request_queue.get() # Consume request if batch is None: # No more work remains. Allow the thread to finish. return response_queue.put(handle_request(batch)) # Produce response mx_worker_thread = _Thread(target=mx_worker) mx_worker_thread.start() try: # Attempt to have two requests in progress at any one time (double # buffering), so that the iterator is creating one batch while MXNet # performs inference on the other. if dataIter.has_next: request_queue.put(next(dataIter)) # Produce request while dataIter.has_next: request_queue.put(next(dataIter)) # Produce request consume_response(response_queue.get()) consume_response(response_queue.get()) finally: # Tell the worker thread to shut down. request_queue.put(None) return out.close()
def __init__(self, sframe, batch_size, shuffle, feature_column, input_shape, num_epochs=None, repeat_each_image=1, loader_type='stretch', aug_params={}, sequential=True): if sframe[feature_column].dtype != _tc.Image: raise _ToolkitError('Feature column must be of type Image') if loader_type in {'stretch', 'stretch-with-augmentation'}: img_prep_fn = lambda img: _stretch_resize(img, input_shape) elif loader_type in { 'pad', 'pad-with-augmentation', 'favor-native-size' }: img_prep_fn = lambda img: _resize_if_too_large(img, input_shape) else: raise ValueError('Unknown loader-type') if loader_type.endswith('-with-augmentation'): augs = _mx.image.CreateDetAugmenter( data_shape=(3, ) + tuple(input_shape), resize=aug_params['aug_resize'], rand_crop=aug_params['aug_rand_crop'], rand_pad=aug_params['aug_rand_pad'], rand_mirror=aug_params['aug_horizontal_flip'], rand_gray=aug_params['aug_rand_gray'], mean=_np.zeros(3), std=_np.ones(3), brightness=aug_params['aug_brightness'], contrast=aug_params['aug_contrast'], saturation=aug_params['aug_saturation'], hue=aug_params['aug_hue'], pca_noise=aug_params['aug_pca_noise'], inter_method=aug_params['aug_inter_method'], min_object_covered=aug_params['aug_min_object_covered'], pad_val=(128, 128, 128), area_range=aug_params['aug_area_range']) else: augs = [] self.augmentations = augs self.cur_batch = 0 self.batch_size = batch_size self.input_shape = input_shape self.shuffle = shuffle self.feature_column = feature_column self.cur_epoch = 0 self.cur_sample = 0 self.cur_repeat = 0 self.num_epochs = num_epochs self.repeat_each_image = repeat_each_image self.loader_type = loader_type # Make shallow copy, so that temporary columns do not change input self.sframe = sframe.copy() # Convert images to raw to eliminate overhead of decoding if sequential: builder = _tc.SArrayBuilder(_tc.Image) for img in self.sframe[self.feature_column]: builder.append(img_prep_fn(img)) self.sframe[_TMP_COL_PREP_IMAGE] = builder.close() else: self.sframe[_TMP_COL_PREP_IMAGE] = self.sframe[ self.feature_column].apply(img_prep_fn) self._provide_data = [ _mx.io.DataDesc(name='image', shape=(batch_size, 3) + tuple(input_shape), layout='NCHW') ]