def add_data(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the basic training endpoint. Given a piece of text and a score, either categorical or numeric, this endpoint will train a new model given the additional piece of information. Inputs data - List: The text and collection/score associated with it. The length of the text (string) should ideally be longer than 100 characters and contain at least 10 words. While the API will support shorter text, you will find that the accuracy of results improves significantly with longer examples. For an additional fee, this end point will support image input as well. The collection/score can be a string or float. This is the variable associated with the text. This can either be categorical (the tag associated with the post) or numeric (the number of Facebook shares the post received). However it can only be one or another within a given label. collection (optional) - String: This is an identifier for the particular model being trained. The indico API allows you to train a number of different models. If the collection is not provided, indico will add a default label. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> indicoio.add_data([[text, .5]]) """ batch = isinstance(data[0], list) if batch: X, Y = zip(*data) X = image_preprocess(X, batch=batch) data = map(list, zip(X, Y)) else: data[0] = image_preprocess(data[0], batch=batch) kwargs['collection'] = self.collection if self.domain: kwargs["domain"] = self.domain url_params = { "batch": batch, "api_key": api_key, "version": version, 'method': "add_data" } return api_handler(data, cloud=cloud, api="custom", url_params=url_params, **kwargs)
def add_data(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the basic training endpoint. Given a piece of text and a score, either categorical or numeric, this endpoint will train a new model given the additional piece of information. Inputs data - List: The text and collection/score associated with it. The length of the text (string) should ideally be longer than 100 characters and contain at least 10 words. While the API will support shorter text, you will find that the accuracy of results improves significantly with longer examples. For an additional fee, this end point will support image input as well. The collection/score can be a string or float. This is the variable associated with the text. This can either be categorical (the tag associated with the post) or numeric (the number of Facebook shares the post received). However it can only be one or another within a given label. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. """ batch = isinstance(data[0], (list, tuple)) if batch: data = map(list, data) X, Y = zip(*data) X = image_preprocess(X, batch=batch) # must type cast map obj to list for python3 compatability data = list(map(list, zip(X, Y))) else: data = list(data) data[0] = image_preprocess(data[0], batch=batch) url_params = { "batch": batch, "api_key": api_key, "version": version, 'method': "add_data" } return self._api_handler(data, cloud=cloud, api="custom", url_params=url_params, **kwargs)
def image_features(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a 2048 dimensional sparse feature vector explaining that image. Useful as a form of feature engineering for image oriented tasks. * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, resizing will be done internally to 144x144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_features >>> import numpy as np >>> image = np.zeros((144,144,3)) >>> features = image_features(image) >>> len(features),np.min(features),np.max(features),np.sum(np.asarray(f)!=0) (2048, 0.0, 6.97088623046875, 571) Since the image features returned are a semantic description of the contents of an image they can be used to implement many other common image related tasks such as object recognition or image similarity and retrieval. For image similarity, simple distance metrics applied to collections of image feature vectors can work very well. :param image: The image to be analyzed. :type image: numpy.ndarray :rtype: List containing features """ image = image_preprocess(image, batch=batch, size=144, min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagefeatures", url_params=url_params, **kwargs)
def image_recognition(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a dictionary of image classifications with associated scores * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, minaxis resizing will be done internally to 144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_recognition >>> features = image_recognition(<filename>) :param image: The image to be analyzed. :type image: str :rtype: dict containing classifications """ image = image_preprocess(image, batch=batch, size=144, min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagerecognition", url_params=url_params, **kwargs)
def predict(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the prediction endpoint. This will be the primary interaction point for all predictive analysis. Inputs data - String: The text example being provided to the API. As a general rule, the data should be as similar to the examples given to the train function (above) as possible. Because language in different domains is used very differently the accuracy will generally drop as the difference between this text and the training text increases. Base64 encoded image data, image urls, and text content are all valid. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. """ batch = detect_batch(data) data = image_preprocess(data, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return self._api_handler(data, cloud=cloud, api="custom", url_params=url_params, **kwargs)
def analyze_image(image, apis=IMAGE_APIS, **kwargs): """ Given input image, returns the results of specified image apis. Possible apis include: ['fer', 'facial_features', 'image_features'] Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> results = indicoio.analyze_image(image = face, apis = ["fer", "facial_features"]) >>> fer = results["fer"] >>> facial_features = results["facial_features"] :param text: The text to be analyzed. :param apis: List of apis to use. :type text: str or unicode :type apis: list of str :rtype: Dictionary of api responses """ cloud = kwargs.pop('cloud', None) batch = kwargs.pop('batch', False) api_key = kwargs.pop('api_key', None) return multi(data=image_preprocess(image, batch=batch), datatype="image", cloud=cloud, batch=batch, api_key=api_key, apis=apis, **kwargs)
def facial_features(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an grayscale input image of a face, returns a 48 dimensional feature vector explaining that face. Useful as a form of feature engineering for face oriented tasks. Input should be in a list of list format, resizing will be attempted internally but for best performance, images should be already sized at 48x48 pixels. Example usage: .. code-block:: python >>> from indicoio import facial_features >>> import numpy as np >>> face = np.zeros((48,48)) >>> features = facial_features(face) >>> len(features) 48 :param image: The image to be analyzed. :type image: list of lists :rtype: List containing feature responses """ image = image_preprocess(image, batch=batch, size=None if kwargs.get("detect") else (48, 48)) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="facialfeatures", url_params=url_params, **kwargs)
def fer(image, cloud=None, batch=False, api_key=None, **kwargs): """ Given a grayscale input image of a face, returns a probability distribution over emotional state. Input should be in a list of list format, resizing will be attempted internally but for best performance, images should be already sized at 48x48 pixels.. Example usage: .. code-block:: python >>> from indicoio import fer >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> emotions = fer(face) >>> emotions {u'Angry': 0.6340586827229989, u'Sad': 0.1764309536057839, u'Neutral': 0.05582989039191157, u'Surprise': 0.0072685938275375344, u'Fear': 0.08523385724298838, u'Happy': 0.04117802220878012} :param image: The image to be analyzed. :type image: list of lists :rtype: Dictionary containing emotion probability pairs """ image = image_preprocess(image, batch=batch, size=None if kwargs.get("detect") else (48, 48) ) url_params = {"batch": batch, "api_key": api_key} return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
def facial_localization(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an image, returns a list of faces found within the image. For each face, we return a dictionary containing the upper left corner and lower right corner. If crop is True, the cropped face is included in the dictionary. Input should be in a numpy ndarray or a filename. Example usage: .. code-block:: python >>> from indicoio import facial_localization >>> import numpy as np >>> img = np.zeros([image of a face]) >>> faces = facial_localization(img) >>> len(faces) 1 :param image: The image to be analyzed. :type image: filepath or ndarray :rtype: List of faces (dict) found. """ image = image_preprocess(image, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="faciallocalization", url_params=url_params, **kwargs)
def image_features(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a 2048 dimensional sparse feature vector explaining that image. Useful as a form of feature engineering for image oriented tasks. * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, resizing will be done internally to 144x144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_features >>> import numpy as np >>> image = np.zeros((144,144,3)) >>> features = image_features(image) >>> len(features),np.min(features),np.max(features),np.sum(np.asarray(f)!=0) (2048, 0.0, 6.97088623046875, 571) Since the image features returned are a semantic description of the contents of an image they can be used to implement many other common image related tasks such as object recognition or image similarity and retrieval. For image similarity, simple distance metrics applied to collections of image feature vectors can work very well. :param image: The image to be analyzed. :type image: numpy.ndarray :rtype: List containing features """ image = image_preprocess(image, batch=batch, size=(144,144), min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagefeatures", url_params=url_params, **kwargs)
def remove_example(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is an API made to remove a single instance of training data. This is useful in cases where a single instance of content has been modified, but the remaining examples remain valid. For example, if a piece of content has been retagged. Inputs data - String: The exact text you wish to remove from the given collection. If the string provided does not match a known piece of text then this will fail. Again, this is required if an id is not provided, and vice-versa. collection - String: the collection from which you wish to remove the specified text. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> indicoio.remove_example(text="I am Sam. Sam I am.", lablel="popularity_predictor") """ kwargs['collection'] = self.collection batch = detect_batch(data) data = image_preprocess(data, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version, 'method': 'remove_example'} return api_handler(data, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def predict(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the prediction endpoint. This will be the primary interaction point for all predictive analysis. Inputs data - String: The text example being provided to the API. As a general rule, the data should be as similar to the examples given to the train function (above) as possible. Because language in different domains is used very differently the accuracy will generally drop as the difference between this text and the training text increases. Base64 encoded image data, image urls, and text content are all valid. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. collection (optional) - String: This is an identifier for the particular model to use for prediction. The response format for the given label will match the format of the training examples api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> text = "I am Sam. Sam I am." >>> prediction = indicoio.predict(text) .75 """ batch = detect_batch(data) kwargs['collection'] = self.collection data = image_preprocess(data, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(data, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def fer(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given a grayscale input image of a face, returns a probability distribution over emotional state. Input should be in a list of list format, resizing will be attempted internally but for best performance, images should be already sized at 48x48 pixels.. Example usage: .. code-block:: python >>> from indicoio import fer >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> emotions = fer(face) >>> emotions {u'Angry': 0.6340586827229989, u'Sad': 0.1764309536057839, u'Neutral': 0.05582989039191157, u'Surprise': 0.0072685938275375344, u'Fear': 0.08523385724298838, u'Happy': 0.04117802220878012} :param image: The image to be analyzed. :type image: list of lists :rtype: Dictionary containing emotion probability pairs """ image = image_preprocess(image, batch=batch, size=None if kwargs.get("detect") else (48, 48)) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
def content_filtering(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given a grayscale input image, returns how obcene the image is. Input should be in a list of list format. Example usage: .. code-block:: python >>> from indicoio import content_filtering >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> res = content_filtering(face) >>> res .056 :param image: The image to be analyzed. :type image: list of lists :rtype: float of nsfwness """ image = image_preprocess(image, batch=batch, size=128, min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
def add_data(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the basic training endpoint. Given a piece of text and a score, either categorical or numeric, this endpoint will train a new model given the additional piece of information. Inputs data - List: The text and collection/score associated with it. The length of the text (string) should ideally be longer than 100 characters and contain at least 10 words. While the API will support shorter text, you will find that the accuracy of results improves significantly with longer examples. For an additional fee, this end point will support image input as well. The collection/score can be a string or float. This is the variable associated with the text. This can either be categorical (the tag associated with the post) or numeric (the number of Facebook shares the post received). However it can only be one or another within a given label. collection (optional) - String: This is an identifier for the particular model being trained. The indico API allows you to train a number of different models. If the collection is not provided, indico will add a default label. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> indicoio.add_data([[text, .5]]) """ batch = isinstance(data[0], list) if batch: X, Y = zip(*data) X = image_preprocess(X, batch=batch) data = map(list, zip(X, Y)) else: data[0] = image_preprocess(data[0], batch=batch) kwargs['collection'] = self.collection if self.domain: kwargs["domain"] = self.domain url_params = {"batch": batch, "api_key": api_key, "version": version, 'method': "add_data"} return api_handler(data, cloud=cloud, api="custom", url_params=url_params, **kwargs)
def remove_example(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is an API made to remove a single instance of training data. This is useful in cases where a single instance of content has been modified, but the remaining examples remain valid. For example, if a piece of content has been retagged. Inputs data - String: The exact text you wish to remove from the given collection. If the string provided does not match a known piece of text then this will fail. Again, this is required if an id is not provided, and vice-versa. collection - String: the collection from which you wish to remove the specified text. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> indicoio.remove_example(text="I am Sam. Sam I am.", lablel="popularity_predictor") """ kwargs['collection'] = self.collection batch = detect_batch(data) data = image_preprocess(data, batch=batch) url_params = { "batch": batch, "api_key": api_key, "version": version, 'method': 'remove_example' } return api_handler(data, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def analyze_image(image, apis=IMAGE_APIS, **kwargs): """ Given input image, returns the results of specified image apis. Possible apis include: ['fer', 'facial_features', 'image_features'] Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> results = indicoio.analyze_image(image = face, apis = ["fer", "facial_features"]) >>> fer = results["fer"] >>> facial_features = results["facial_features"] :param text: The text to be analyzed. :param apis: List of apis to use. :type text: str or unicode :type apis: list of str :rtype: Dictionary of api responses """ if not apis: apis = list(set(TEXT_APIS) - set(MULTIAPI_NOT_SUPPORTED)) cloud = kwargs.pop('cloud', None) batch = kwargs.pop('batch', False) api_key = kwargs.pop('api_key', None) return multi( data=image_preprocess(image, batch=batch), datatype="image", cloud=cloud, batch=batch, api_key=api_key, apis=apis, **kwargs )
def content_filtering(image, cloud=None, batch=False, api_key=None, **kwargs): """ Given a grayscale input image, returns how obcene the image is. Input should be in a list of list format. Example usage: .. code-block:: python >>> from indicoio import content_filtering >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> res = content_filtering(face) >>> res .056 :param image: The image to be analyzed. :type image: list of lists :rtype: float of nsfwness """ image = image_preprocess(image, batch=batch, size=None, min_axis=128) url_params = {"batch": batch, "api_key": api_key} return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
def image_recognition(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a dictionary of image classifications with associated scores * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, minaxis resizing will be done internally to 144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_recognition >>> features = image_recognition(<filename>) :param image: The image to be analyzed. :type image: str :rtype: dict containing classifications """ image = image_preprocess(image, size=144, min_axis=True, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagerecognition", url_params=url_params, **kwargs)
def test_min_axis_resize(self): test_image = os.path.normpath(os.path.join(DIR, "data/fear.png")) resized_image = image_preprocess(test_image, size=360, min_axis=True) image_string = BytesIO(base64.b64decode(resized_image)) image = Image.open(image_string) self.assertEqual(image.size, (360.0, 360.0))
def test_min_axis_resize(self): test_image = os.path.normpath(os.path.join(DIR, "data/fear.png")) resized_image = image_preprocess(test_image, min_axis=360) image_string = BytesIO(base64.b64decode(resized_image)) image = Image.open(image_string) self.assertEqual(image.size, (360.0, 360.0))