Ejemplo n.º 1
0
def get_prediction_results(model_dir_or_id, data, headers, img_cols=None,
                           cloud=False, show_image=True):
  """ Predict with a specified model.

  It predicts with the model, join source data with prediction results, and formats
  the results so they can be displayed nicely in Datalab.

  Args:
    model_dir_or_id: The model directory if cloud is False, or model.version if cloud is True.
    data: Can be a list of dictionaries, a list of csv lines, or a Pandas DataFrame.
    headers: the column names of data. It specifies the order of the columns when
        serializing to csv lines.
    img_cols: The image url columns. If specified, the img_urls will be concerted to
        base64 encoded image bytes.
    show_image: When displaying results, whether to add a column for showing images for
        each image column.

  Returns:
    A dataframe of joined prediction source and prediction results.
  """

  if img_cols is None:
    img_cols = []

  if isinstance(data, pd.DataFrame):
    data = list(data.T.to_dict().values())
  elif isinstance(data[0], six.string_types):
    data = list(csv.DictReader(data, fieldnames=headers))

  images = _download_images(data, img_cols)
  predict_data = _get_predicton_csv_lines(data, headers, images)
  display_data = data
  if show_image:
    display_data = _get_display_data_with_images(data, images)

  if cloud:
    parts = model_dir_or_id.split('.')
    if len(parts) != 2:
      raise ValueError('Invalid model name for cloud prediction. Use "model.version".')

    predict_results = ml.ModelVersions(parts[0]).predict(parts[1], predict_data)
  else:
    predict_results = _tf_predict(model_dir_or_id, predict_data)

  df_r = pd.DataFrame(predict_results)
  df_s = pd.DataFrame(display_data)
  df = pd.concat([df_r, df_s], axis=1)
  # Remove duplicate columns. All 'key' columns are duplicate here.
  df = df.loc[:, ~df.columns.duplicated()]

  return df
Ejemplo n.º 2
0
def cloud_predict(model_name, model_version, data):
    """Use Online prediction.

  Runs online prediction in the cloud and prints the results to the screen. For
  running prediction on a large dataset or saving the results, run
  local_batch_prediction or batch_prediction.

  Args:
    model_name: deployed model name
    model_version: depoyed model version
    data: List of csv strings or a Pandas DataFrame that match the model schema.

  Before using this, the model must be created. This can be done by running
  two gcloud commands:
  1) gcloud beta ml models create NAME
  2) gcloud beta ml versions create VERSION --model NAME \
      --origin gs://BUCKET/training_dir/model
  or these datalab commands:
  1) import google.datalab as datalab
     model = datalab.ml.ModelVersions(MODEL_NAME)
     model.deploy(version_name=VERSION,
                  path='gs://BUCKET/training_dir/model')
  Note that the model must be on GCS.
  """
    import google.datalab.ml as ml

    if isinstance(data, pd.DataFrame):
        # write the df to csv.
        string_buffer = io.StringIO()
        data.to_csv(string_buffer, header=None, index=False)
        input_data = string_buffer.getvalue().split('\n')

        # remove empty strings
        input_data = [line for line in input_data if line]
    else:
        input_data = data

    predictions = ml.ModelVersions(model_name).predict(model_version,
                                                       input_data)

    # Convert predictions into a dataframe
    df = pd.DataFrame(columns=sorted(predictions[0].keys()))
    for i in range(len(predictions)):
        for k, v in predictions[i].iteritems():
            df.loc[i, k] = v
    return df
Ejemplo n.º 3
0
    def predict(model_id, image_files, resize, show_image):
        """Predict using a deployed (online) model."""

        import google.datalab.ml as ml

        images = _util.load_images(image_files, resize=resize)

        parts = model_id.split('.')
        if len(parts) != 2:
            raise ValueError(
                'Invalid model name for cloud prediction. Use "model.version".'
            )
        if len(images) == 0:
            raise ValueError('images is empty.')

        data = []
        for ii, image in enumerate(images):
            image_encoded = base64.b64encode(image)
            data.append({
                'key': str(ii),
                'image_bytes': {
                    'b64': image_encoded
                }
            })

        predictions = ml.ModelVersions(parts[0]).predict(parts[1], data)
        if len(predictions) == 0:
            raise Exception('Prediction results are empty.')
        # Although prediction results contains a labels list in each instance, they are all the same
        # so taking the first one.
        labels = predictions[0]['labels']
        labels_and_scores = [(x['prediction'],
                              x['scores'][labels.index(x['prediction'])])
                             for x in predictions]
        results = zip(image_files, images, labels_and_scores)
        ret = _util.process_prediction_results(results, show_image)
        return ret
Ejemplo n.º 4
0
def get_prediction_results(model_dir_or_id,
                           data,
                           headers,
                           img_cols=None,
                           cloud=False,
                           with_source=True,
                           show_image=True):
    """ Predict with a specified model.

  It predicts with the model, join source data with prediction results, and formats
  the results so they can be displayed nicely in Datalab.

  Args:
    model_dir_or_id: The model directory if cloud is False, or model.version if cloud is True.
    data: Can be a list of dictionaries, a list of csv lines, or a Pandas DataFrame. If it is not
        a list of csv lines, data will be converted to csv lines first, using the orders specified
        by headers and then send to model. For images, it can be image gs urls or in-memory PIL
        images. Images will be converted to base64 encoded strings before prediction.
    headers: the column names of data. It specifies the order of the columns when
        serializing to csv lines for prediction.
    img_cols: The image url columns. If specified, the img_urls will be converted to
        base64 encoded image bytes.
    with_source: Whether return a joined prediction source and prediction results, or prediction
        results only.
    show_image: When displaying prediction source, whether to add a column of image bytes for
        each image url column.

  Returns:
    A dataframe of joined prediction source and prediction results, or prediction results only.
  """

    if img_cols is None:
        img_cols = []

    if isinstance(data, pd.DataFrame):
        data = list(data.T.to_dict().values())
    elif isinstance(data[0], six.string_types):
        data = list(csv.DictReader(data, fieldnames=headers))

    images = _download_images(data, img_cols)
    predict_data = _get_predicton_csv_lines(data, headers, images)

    if cloud:
        parts = model_dir_or_id.split('.')
        if len(parts) != 2:
            raise ValueError(
                'Invalid model name for cloud prediction. Use "model.version".'
            )

        predict_results = ml.ModelVersions(parts[0]).predict(
            parts[1], predict_data)
    else:
        tf_logging_level = logging.getLogger("tensorflow").level
        logging.getLogger("tensorflow").setLevel(logging.WARNING)
        try:
            predict_results = _tf_predict(model_dir_or_id, predict_data)
        finally:
            logging.getLogger("tensorflow").setLevel(tf_logging_level)

    df_r = pd.DataFrame(predict_results)
    if not with_source:
        return df_r

    display_data = data
    if show_image:
        display_data = _get_display_data_with_images(data, images)

    df_s = pd.DataFrame(display_data)
    df = pd.concat([df_r, df_s], axis=1)
    # Remove duplicate columns. All 'key' columns are duplicate here.
    df = df.loc[:, ~df.columns.duplicated()]

    return df