def get_prediction_results(model_dir_or_id, data, headers, img_cols=None, cloud=False, show_image=True): """ Predict with a specified model. It predicts with the model, join source data with prediction results, and formats the results so they can be displayed nicely in Datalab. Args: model_dir_or_id: The model directory if cloud is False, or model.version if cloud is True. data: Can be a list of dictionaries, a list of csv lines, or a Pandas DataFrame. headers: the column names of data. It specifies the order of the columns when serializing to csv lines. img_cols: The image url columns. If specified, the img_urls will be concerted to base64 encoded image bytes. show_image: When displaying results, whether to add a column for showing images for each image column. Returns: A dataframe of joined prediction source and prediction results. """ if img_cols is None: img_cols = [] if isinstance(data, pd.DataFrame): data = list(data.T.to_dict().values()) elif isinstance(data[0], six.string_types): data = list(csv.DictReader(data, fieldnames=headers)) images = _download_images(data, img_cols) predict_data = _get_predicton_csv_lines(data, headers, images) display_data = data if show_image: display_data = _get_display_data_with_images(data, images) if cloud: parts = model_dir_or_id.split('.') if len(parts) != 2: raise ValueError('Invalid model name for cloud prediction. Use "model.version".') predict_results = ml.ModelVersions(parts[0]).predict(parts[1], predict_data) else: predict_results = _tf_predict(model_dir_or_id, predict_data) df_r = pd.DataFrame(predict_results) df_s = pd.DataFrame(display_data) df = pd.concat([df_r, df_s], axis=1) # Remove duplicate columns. All 'key' columns are duplicate here. df = df.loc[:, ~df.columns.duplicated()] return df
def cloud_predict(model_name, model_version, data): """Use Online prediction. Runs online prediction in the cloud and prints the results to the screen. For running prediction on a large dataset or saving the results, run local_batch_prediction or batch_prediction. Args: model_name: deployed model name model_version: depoyed model version data: List of csv strings or a Pandas DataFrame that match the model schema. Before using this, the model must be created. This can be done by running two gcloud commands: 1) gcloud beta ml models create NAME 2) gcloud beta ml versions create VERSION --model NAME \ --origin gs://BUCKET/training_dir/model or these datalab commands: 1) import google.datalab as datalab model = datalab.ml.ModelVersions(MODEL_NAME) model.deploy(version_name=VERSION, path='gs://BUCKET/training_dir/model') Note that the model must be on GCS. """ import google.datalab.ml as ml if isinstance(data, pd.DataFrame): # write the df to csv. string_buffer = io.StringIO() data.to_csv(string_buffer, header=None, index=False) input_data = string_buffer.getvalue().split('\n') # remove empty strings input_data = [line for line in input_data if line] else: input_data = data predictions = ml.ModelVersions(model_name).predict(model_version, input_data) # Convert predictions into a dataframe df = pd.DataFrame(columns=sorted(predictions[0].keys())) for i in range(len(predictions)): for k, v in predictions[i].iteritems(): df.loc[i, k] = v return df
def predict(model_id, image_files, resize, show_image): """Predict using a deployed (online) model.""" import google.datalab.ml as ml images = _util.load_images(image_files, resize=resize) parts = model_id.split('.') if len(parts) != 2: raise ValueError( 'Invalid model name for cloud prediction. Use "model.version".' ) if len(images) == 0: raise ValueError('images is empty.') data = [] for ii, image in enumerate(images): image_encoded = base64.b64encode(image) data.append({ 'key': str(ii), 'image_bytes': { 'b64': image_encoded } }) predictions = ml.ModelVersions(parts[0]).predict(parts[1], data) if len(predictions) == 0: raise Exception('Prediction results are empty.') # Although prediction results contains a labels list in each instance, they are all the same # so taking the first one. labels = predictions[0]['labels'] labels_and_scores = [(x['prediction'], x['scores'][labels.index(x['prediction'])]) for x in predictions] results = zip(image_files, images, labels_and_scores) ret = _util.process_prediction_results(results, show_image) return ret
def get_prediction_results(model_dir_or_id, data, headers, img_cols=None, cloud=False, with_source=True, show_image=True): """ Predict with a specified model. It predicts with the model, join source data with prediction results, and formats the results so they can be displayed nicely in Datalab. Args: model_dir_or_id: The model directory if cloud is False, or model.version if cloud is True. data: Can be a list of dictionaries, a list of csv lines, or a Pandas DataFrame. If it is not a list of csv lines, data will be converted to csv lines first, using the orders specified by headers and then send to model. For images, it can be image gs urls or in-memory PIL images. Images will be converted to base64 encoded strings before prediction. headers: the column names of data. It specifies the order of the columns when serializing to csv lines for prediction. img_cols: The image url columns. If specified, the img_urls will be converted to base64 encoded image bytes. with_source: Whether return a joined prediction source and prediction results, or prediction results only. show_image: When displaying prediction source, whether to add a column of image bytes for each image url column. Returns: A dataframe of joined prediction source and prediction results, or prediction results only. """ if img_cols is None: img_cols = [] if isinstance(data, pd.DataFrame): data = list(data.T.to_dict().values()) elif isinstance(data[0], six.string_types): data = list(csv.DictReader(data, fieldnames=headers)) images = _download_images(data, img_cols) predict_data = _get_predicton_csv_lines(data, headers, images) if cloud: parts = model_dir_or_id.split('.') if len(parts) != 2: raise ValueError( 'Invalid model name for cloud prediction. Use "model.version".' ) predict_results = ml.ModelVersions(parts[0]).predict( parts[1], predict_data) else: tf_logging_level = logging.getLogger("tensorflow").level logging.getLogger("tensorflow").setLevel(logging.WARNING) try: predict_results = _tf_predict(model_dir_or_id, predict_data) finally: logging.getLogger("tensorflow").setLevel(tf_logging_level) df_r = pd.DataFrame(predict_results) if not with_source: return df_r display_data = data if show_image: display_data = _get_display_data_with_images(data, images) df_s = pd.DataFrame(display_data) df = pd.concat([df_r, df_s], axis=1) # Remove duplicate columns. All 'key' columns are duplicate here. df = df.loc[:, ~df.columns.duplicated()] return df