def test_local_host(mock_warning, mock_create_url): from indicoio.utils.api import api_handler import indicoio indicoio.config.host = "localhost:8000" api_handler("test", cloud=None, api='sentiment') assert mock_create_url.called_with('http') indicoio.config.host = "apiv2.indico.io"
def test_local_host(mock_warning): from indicoio.utils.api import api_handler import indicoio indicoio.config.host = "localhost:8000" try: api_handler("test", cloud=None, api='sentiment') finally: indicoio.config.host = "apiv2.indico.io"
def test_local_host(warnings_mock): from indicoio.utils.api import api_handler import indicoio previous_host = indicoio.config.host indicoio.config.host = "localhost:8000" try: api_handler("test", cloud=None, api="sentiment") finally: indicoio.config.host = previous_host
def check_valid_keys(self): verify_dependencies(['indicoio']) from indicoio.utils import api from indicoio.utils.errors import IndicoError try: api.api_handler(None, None, self.model_names[0]) except IndicoError as e: if str(e) == 'Invalid API key': return False else: # If valid key, a data error (None passed) is expected here return True
def intersections(data, apis = None, **kwargs): """ Helper to make multi requests of different types. :param data: Data to be sent in API request :param type: String type of API request :rtype: Dictionary of api responses """ # Client side api name checking # remove auto-inserted batch param kwargs.pop('batch', None) if not isinstance(apis, list) or len(apis) != 2: raise IndicoError("Argument 'apis' must be of length 2") if isinstance(data, list) and len(data) < 3: raise IndicoError( "At least 3 examples are required to use the intersections API" ) api_types = list(map(API_TYPES.get, apis)) if api_types[0] != api_types[1]: raise IndicoError( "Both `apis` must accept the same kind of input to use the intersections API" ) cloud = kwargs.get("cloud", None) url_params = { 'batch': False, 'api_key': kwargs.pop('api_key', None), 'apis': apis } return api_handler(data, cloud=cloud, api="apis/intersections", url_params=url_params, **kwargs)
def multi(data, datatype, apis, batch=False, **kwargs): """ Helper to make multi requests of different types. :param data: Data to be sent in API request :param datatype: String type of API request :param apis: List of apis to use. :param batch: Is this a batch request? :rtype: Dictionary of api responses """ # Client side api name checking - strictly only accept func name api available = AVAILABLE_APIS.get(datatype) invalid_apis = [api for api in apis if api not in available] if invalid_apis: raise IndicoError( "%s are not valid %s APIs. Please reference the available APIs below:\n%s" % (", ".join(invalid_apis), datatype, ", ".join(available)) ) # Convert client api names to server names before sending request cloud = kwargs.pop("cloud", None) api_key = kwargs.pop('api_key', None) result = api_handler( data, cloud=cloud, api='apis/multiapi', url_params={ "apis":apis, "batch":batch, "api_key":api_key }, **kwargs ) return handle_response(result)
def add_data(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the basic training endpoint. Given a piece of text and a score, either categorical or numeric, this endpoint will train a new model given the additional piece of information. Inputs data - List: The text and collection/score associated with it. The length of the text (string) should ideally be longer than 100 characters and contain at least 10 words. While the API will support shorter text, you will find that the accuracy of results improves significantly with longer examples. For an additional fee, this end point will support image input as well. The collection/score can be a string or float. This is the variable associated with the text. This can either be categorical (the tag associated with the post) or numeric (the number of Facebook shares the post received). However it can only be one or another within a given label. collection (optional) - String: This is an identifier for the particular model being trained. The indico API allows you to train a number of different models. If the collection is not provided, indico will add a default label. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> indicoio.add_data([[text, .5]]) """ batch = isinstance(data[0], list) if batch: X, Y = zip(*data) X = image_preprocess(X, batch=batch) data = map(list, zip(X, Y)) else: data[0] = image_preprocess(data[0], batch=batch) kwargs['collection'] = self.collection if self.domain: kwargs["domain"] = self.domain url_params = { "batch": batch, "api_key": api_key, "version": version, 'method': "add_data" } return api_handler(data, cloud=cloud, api="custom", url_params=url_params, **kwargs)
def facial_features(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an grayscale input image of a face, returns a 48 dimensional feature vector explaining that face. Useful as a form of feature engineering for face oriented tasks. Input should be in a list of list format, resizing will be attempted internally but for best performance, images should be already sized at 48x48 pixels. Example usage: .. code-block:: python >>> from indicoio import facial_features >>> import numpy as np >>> face = np.zeros((48,48)) >>> features = facial_features(face) >>> len(features) 48 :param image: The image to be analyzed. :type image: list of lists :rtype: List containing feature responses """ image = image_preprocess(image, batch=batch, size=None if kwargs.get("detect") else (48, 48)) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="facialfeatures", url_params=url_params, **kwargs)
def image_features(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a 2048 dimensional sparse feature vector explaining that image. Useful as a form of feature engineering for image oriented tasks. * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, resizing will be done internally to 144x144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_features >>> import numpy as np >>> image = np.zeros((144,144,3)) >>> features = image_features(image) >>> len(features),np.min(features),np.max(features),np.sum(np.asarray(f)!=0) (2048, 0.0, 6.97088623046875, 571) Since the image features returned are a semantic description of the contents of an image they can be used to implement many other common image related tasks such as object recognition or image similarity and retrieval. For image similarity, simple distance metrics applied to collections of image feature vectors can work very well. :param image: The image to be analyzed. :type image: numpy.ndarray :rtype: List containing features """ image = image_preprocess(image, batch=batch, size=144, min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagefeatures", url_params=url_params, **kwargs)
def sentiment_hq(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns a scalar estimate of the sentiment of that text. Values are roughly in the range 0 to 1 with 0.5 indicating neutral sentiment. For reference, 0 suggests very negative sentiment and 1 suggests very positive sentiment. Example usage: .. code-block:: python >>> from indicoio import sentimenthq >>> text = 'Thanks everyone for the birthday wishes!! It was a crazy few days ><' >>> sentiment = sentimenthq(text) >>> sentiment 0.6210052967071533 :param text: The text to be analyzed. :type text: str or unicode :rtype: Float """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="sentimenthq", url_params=url_params, **kwargs)
def fer(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given a grayscale input image of a face, returns a probability distribution over emotional state. Input should be in a list of list format, resizing will be attempted internally but for best performance, images should be already sized at 48x48 pixels.. Example usage: .. code-block:: python >>> from indicoio import fer >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> emotions = fer(face) >>> emotions {u'Angry': 0.6340586827229989, u'Sad': 0.1764309536057839, u'Neutral': 0.05582989039191157, u'Surprise': 0.0072685938275375344, u'Fear': 0.08523385724298838, u'Happy': 0.04117802220878012} :param image: The image to be analyzed. :type image: list of lists :rtype: Dictionary containing emotion probability pairs """ image = image_preprocess(image, batch=batch, size=None if kwargs.get("detect") else (48, 48)) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
def places(text, cloud=None, batch=None, api_key=None, version=None, **kwargs): """ Given input text, returns references to specific places found in the text Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> entities = indicoio.places(text) [ { u'text': "London", u'confidence': 0.18549786508083344, u'position': [0, 6] }, ... ] :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of language probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="places", url_params=url_params, **kwargs)
def facial_localization(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an image, returns a list of faces found within the image. For each face, we return a dictionary containing the upper left corner and lower right corner. If crop is True, the cropped face is included in the dictionary. Input should be in a numpy ndarray or a filename. Example usage: .. code-block:: python >>> from indicoio import facial_localization >>> import numpy as np >>> img = np.zeros([image of a face]) >>> faces = facial_localization(img) >>> len(faces) 1 :param image: The image to be analyzed. :type image: filepath or ndarray :rtype: List of faces (dict) found. """ image = image_preprocess(image, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="faciallocalization", url_params=url_params, **kwargs)
def image_recognition(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a dictionary of image classifications with associated scores * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, minaxis resizing will be done internally to 144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_recognition >>> features = image_recognition(<filename>) :param image: The image to be analyzed. :type image: str :rtype: dict containing classifications """ image = image_preprocess(image, batch=batch, size=144, min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagerecognition", url_params=url_params, **kwargs)
def personas(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns the authors likelihood of being 16 different personality types in a dict. Example usage: .. code-block:: python >>> text = "I love going out with my friends" >>> entities = indicoio.personas(text) {'architect': 0.2191890478134155, 'logician': 0.0158474326133728, 'commander': 0.07654544115066528 ...} :param text: The text to be analyzed. :type text: str or unicode :rtype: The authors 'Extraversion', 'Conscientiousness', 'Openness', and 'Agreeableness' score (a float between 0 and 1) in a dictionary. """ url_params = {"batch": batch, "api_key": api_key, "version": version} kwargs['persona'] = True return api_handler(text, cloud=cloud, api="personality", url_params=url_params, **kwargs)
def keywords(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns series of keywords and associated scores Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' >>> keywords = indicoio.keywords(text, top_n=3) >>> print "The keywords are: "+str(keywords.keys()) u'The keywords are ['delightful', 'highs', 'skies'] :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of feature score pairs """ url_params = {'batch': batch, 'api_key': api_key} return api_handler(text, cloud=cloud, api="keywords", url_params=url_params, **kwargs)
def multi(data, datatype, apis, batch=False, **kwargs): """ Helper to make multi requests of different types. :param data: Data to be sent in API request :param datatype: String type of API request :param apis: List of apis to use. :param batch: Is this a batch request? :rtype: Dictionary of api responses """ # Client side api name checking - strictly only accept func name api available = AVAILABLE_APIS.get(datatype) invalid_apis = [api for api in apis if api not in available] if invalid_apis: raise IndicoError( "%s are not valid %s APIs. Please reference the available APIs below:\n%s" % (", ".join(invalid_apis), datatype, ", ".join(available))) # Convert client api names to server names before sending request cloud = kwargs.pop("cloud", None) api_key = kwargs.pop('api_key', None) result = api_handler(data, cloud=cloud, api='apis/multiapi', url_params={ "apis": apis, "batch": batch, "api_key": api_key }, **kwargs) return handle_response(result)
def image_features(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a 2048 dimensional sparse feature vector explaining that image. Useful as a form of feature engineering for image oriented tasks. * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, resizing will be done internally to 144x144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_features >>> import numpy as np >>> image = np.zeros((144,144,3)) >>> features = image_features(image) >>> len(features),np.min(features),np.max(features),np.sum(np.asarray(f)!=0) (2048, 0.0, 6.97088623046875, 571) Since the image features returned are a semantic description of the contents of an image they can be used to implement many other common image related tasks such as object recognition or image similarity and retrieval. For image similarity, simple distance metrics applied to collections of image feature vectors can work very well. :param image: The image to be analyzed. :type image: numpy.ndarray :rtype: List containing features """ image = image_preprocess(image, batch=batch, size=(144,144), min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagefeatures", url_params=url_params, **kwargs)
def political(text, cloud=None, batch=False, api_key=None, **kwargs): """ Given input text, returns a probability distribution over the political alignment of the speaker. Example usage: .. code-block:: python >>> from indicoio import political >>> import numpy as np >>> text = 'Wish we had more bike lanes. \ Hopefully, driverless cars will chance economics from ownership to fee for service.' >>> affiliation = political(text) >>> affiliation {u'Libertarian': 0.4923755446986322, u'Green': 0.2974443102818122, u'Liberal': 0.13730032938784784, u'Conservative': 0.07287981563170784} >>> least_like = affiliation.keys()[np.argmin(affiliation.values())] >>> most_like = affiliation.keys()[np.argmax(affiliation.values())] >>> 'This text is most like %s and least like %s'%(most_like,least_like) u'This text is most like Libertarian and least like Conservative' :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of party probability pairs """ url_params = {"batch": batch, "api_key": api_key} return api_handler(text, cloud=cloud, api="political", url_params=url_params, **kwargs)
def clear(self, cloud=None, api_key=None, version=None, **kwargs): """ This is an API made to remove all of the data associated from a given colletion. If there's been a data corruption issue, or a large amount of incorrect data has been fed into the API it is often difficult to correct. This allows you to clear a colletion and start from scratch. Use with caution! This is not reversible. Inputs colletion - String: the colletion from which you wish to remove the specified text. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> indicoio.clear_collection("popularity_predictor") """ kwargs['collection'] = self.collection url_params = {"batch": False, "api_key": api_key, "version": version, "method": "clear_collection"} return api_handler(None, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def organizations(text, cloud=None, batch=None, api_key=None, version=2, **kwargs): """ Given input text, returns references to specific organizations found in the text Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> entities = indicoio.organizations(text) [ { u'text': "London Underground", u'confidence': 0.8643872141838074, u'position': [0, 18] } ] :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of language probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="organizations", url_params=url_params, **kwargs)
def named_entities(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns named entities (proper nouns) found in the text Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> entities = indicoio.named_entities(text) {u'London Underground': {u'categories': {u'location': 0.583755654607989, u'organization': 0.07460487821791033, u'person': 0.07304850776658672, u'unknown': 0.2685909594075139}, u'confidence': 0.846188063604044}, u'Mike Brown': {u'categories': {u'location': 0.025813884950623898, u'organization': 0.06661470013014613, u'person': 0.08723850624560824, u'unknown': 0.8203329086736217}, u'confidence': 0.8951793008234012}} :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of language probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="namedentities", url_params=url_params, **kwargs)
def language(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns a probability distribution over 33 possible languages of what language the text was written in. Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' >>> possible = indicoio.language(text) >>> language = possible.keys()[np.argmax(possible.values())] >>> probability = np.max(possible.values()) >>> 'Predicted %s with probability %.4f'%(language,probability) u'Predicted English with probability 0.8548' :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of language probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="language", url_params=url_params, **kwargs)
def remove_example(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is an API made to remove a single instance of training data. This is useful in cases where a single instance of content has been modified, but the remaining examples remain valid. For example, if a piece of content has been retagged. Inputs data - String: The exact text you wish to remove from the given collection. If the string provided does not match a known piece of text then this will fail. Again, this is required if an id is not provided, and vice-versa. collection - String: the collection from which you wish to remove the specified text. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> indicoio.remove_example(text="I am Sam. Sam I am.", lablel="popularity_predictor") """ kwargs['collection'] = self.collection batch = detect_batch(data) data = image_preprocess(data, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version, 'method': 'remove_example'} return api_handler(data, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def collections(cloud=None, api_key=None, version=None, **kwargs): """ This is a status report endpoint. It is used to get the status on all of the collections currently trained, as well as some basic statistics on their accuracies. Inputs api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> collections = indicoio.collections() { "tag_predictor": { "input_type": "text", "model_type": "classification", "number_of_samples": 224 'status': 'ready' }, "popularity_predictor": { "input_type": "text", "model_type": "regression", "number_of_samples": 231 'status': 'training' } } } """ url_params = {"batch": False, "api_key": api_key, "version": version, "method": "collections"} return api_handler(None, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def text_tags(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns a probability distribution over 100 document categories Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' >>> possible = indicoio.classification(text) >>> category = possible.keys()[np.argmax(possible.values())] >>> probability = np.max(possible.values()) >>> "Predicted category '%s' with probability %.4f"%(category,probability) u'Predicted 'Weather' with probability 0.8548' :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of class probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="texttags", url_params=url_params, **kwargs)
def fer(image, cloud=None, batch=False, api_key=None, **kwargs): """ Given a grayscale input image of a face, returns a probability distribution over emotional state. Input should be in a list of list format, resizing will be attempted internally but for best performance, images should be already sized at 48x48 pixels.. Example usage: .. code-block:: python >>> from indicoio import fer >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> emotions = fer(face) >>> emotions {u'Angry': 0.6340586827229989, u'Sad': 0.1764309536057839, u'Neutral': 0.05582989039191157, u'Surprise': 0.0072685938275375344, u'Fear': 0.08523385724298838, u'Happy': 0.04117802220878012} :param image: The image to be analyzed. :type image: list of lists :rtype: Dictionary containing emotion probability pairs """ image = image_preprocess(image, batch=batch, size=None if kwargs.get("detect") else (48, 48) ) url_params = {"batch": batch, "api_key": api_key} return api_handler(image, cloud=cloud, api="fer", url_params=url_params, **kwargs)
def twitter_engagement(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns an engagment score between 0 and 1 Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' >>> engagement = indicoio.twitter_engagement(text) :param text: The text to be analyzed. :type text: str or unicode :rtype: Float of engagement between 0 and 1 """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="twitterengagement", url_params=url_params, **kwargs)
def relevance(data, queries, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text and a list of query terms / phrases, returns how relevant the query is to the input text. Example usage: .. code-block:: python >>> import indicoio >>> text = 'On Monday, president Barack Obama will be giving his keynote address at...' >>> relevance = indicoio.relevance(text, queries=['president']) >>> print "Relevance: " + str(relevance[0]) u'Relevance: [0.44755361996336784]' :param text: The text to be analyzed. :param queries: a list of terms or phrases to measure similarity against :type text: str or unicode :rtype: Dictionary of feature score pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} kwargs['queries'] = queries kwargs['synonyms'] = False return api_handler(data, cloud=cloud, api="relevance", url_params=url_params, **kwargs)
def predict(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the prediction endpoint. This will be the primary interaction point for all predictive analysis. Inputs data - String: The text example being provided to the API. As a general rule, the data should be as similar to the examples given to the train function (above) as possible. Because language in different domains is used very differently the accuracy will generally drop as the difference between this text and the training text increases. Base64 encoded image data, image urls, and text content are all valid. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. collection (optional) - String: This is an identifier for the particular model to use for prediction. The response format for the given label will match the format of the training examples api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> text = "I am Sam. Sam I am." >>> prediction = indicoio.predict(text) .75 """ batch = detect_batch(data) kwargs['collection'] = self.collection data = image_preprocess(data, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(data, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def personality(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns the authors 'Extraversion', 'Conscientiousness', 'Openness', and 'Agreeableness' score (a float between 0 and 1) in a dictionary. Example usage: .. code-block:: python >>> text = "I love going out with my friends" >>> entities = indicoio.personality(text) {'Extraversion': 0.69691890478134155, 'Conscientiousness': 0.4658474326133728, 'Openness': 0.42654544115066528, 'Agreeableness': 0.7414245903} :param text: The text to be analyzed. :type text: str or unicode :rtype: The authors 'Extraversion', 'Conscientiousness', 'Openness', and 'Agreeableness' score (a float between 0 and 1) in a dictionary. """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="personality", url_params=url_params, **kwargs)
def content_filtering(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given a grayscale input image, returns how obcene the image is. Input should be in a list of list format. Example usage: .. code-block:: python >>> from indicoio import content_filtering >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> res = content_filtering(face) >>> res .056 :param image: The image to be analyzed. :type image: list of lists :rtype: float of nsfwness """ image = image_preprocess(image, batch=batch, size=128, min_axis=True) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
def emotion(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns a probability distribution over 5 possible emotions of what language the text was written in. Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> text = "I did it. I got into Grad School. Not just any program, but a GREAT program. :-)" >>> possible = indicoio.emotion(text) >>> emotion = possible.keys()[np.argmax(possible.values())] >>> probability = np.max(possible.values()) >>> 'Predicted `%s` with probability %.4f' % (emotion, probability) u'Predicted `joy` with probability 0.7744' :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of emotion probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="emotion", url_params=url_params, **kwargs)
def people(text, cloud=None, batch=None, api_key=None, version=2, **kwargs): """ Given input text, returns references to specific persons found in the text Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> entities = indicoio.people(text) [ { u'text': "Mike Brown", u'confidence': 0.09470917284488678, u'position': [26, 36] }, ... ] :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of language probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="people", url_params=url_params, **kwargs)
def text_features(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns a numeric feature vector that represents the content. Example usage: .. code-block:: python >>> from indicoio import text_features >>> text_features("Queen of England") [0.04509247093572533, -0.052756784338865576, ...] :param text: The text to be analyzed. :type text: str or unicode :rtype: List of floats which represents the content of the input text """ url_params = {"batch": batch, "api_key": api_key, "version": version} kwargs['synonyms'] = False return api_handler(text, cloud=cloud, api="textfeatures", url_params=url_params, **kwargs)
def predict(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the prediction endpoint. This will be the primary interaction point for all predictive analysis. Inputs data - String: The text example being provided to the API. As a general rule, the data should be as similar to the examples given to the train function (above) as possible. Because language in different domains is used very differently the accuracy will generally drop as the difference between this text and the training text increases. Base64 encoded image data, image urls, and text content are all valid. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. collection (optional) - String: This is an identifier for the particular model to use for prediction. The response format for the given label will match the format of the training examples api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> text = "I am Sam. Sam I am." >>> prediction = indicoio.predict(text) .75 """ batch = detect_batch(data) if self.domain: kwargs["domain"] = self.domain kwargs['collection'] = self.collection data = image_preprocess(data, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(data, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def remove_example(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is an API made to remove a single instance of training data. This is useful in cases where a single instance of content has been modified, but the remaining examples remain valid. For example, if a piece of content has been retagged. Inputs data - String: The exact text you wish to remove from the given collection. If the string provided does not match a known piece of text then this will fail. Again, this is required if an id is not provided, and vice-versa. collection - String: the collection from which you wish to remove the specified text. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> indicoio.remove_example(text="I am Sam. Sam I am.", lablel="popularity_predictor") """ kwargs['collection'] = self.collection batch = detect_batch(data) data = image_preprocess(data, batch=batch) url_params = { "batch": batch, "api_key": api_key, "version": version, 'method': 'remove_example' } return api_handler(data, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def add_data(self, data, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the basic training endpoint. Given a piece of text and a score, either categorical or numeric, this endpoint will train a new model given the additional piece of information. Inputs data - List: The text and collection/score associated with it. The length of the text (string) should ideally be longer than 100 characters and contain at least 10 words. While the API will support shorter text, you will find that the accuracy of results improves significantly with longer examples. For an additional fee, this end point will support image input as well. The collection/score can be a string or float. This is the variable associated with the text. This can either be categorical (the tag associated with the post) or numeric (the number of Facebook shares the post received). However it can only be one or another within a given label. collection (optional) - String: This is an identifier for the particular model being trained. The indico API allows you to train a number of different models. If the collection is not provided, indico will add a default label. domain (optional) - String: This is an identifier that helps determine the appropriate techniques for indico to use behind the scenes to train your model. One of {"standard", "topics"}. api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> text = "London Underground's boss Mike Brown warned that the strike ..." >>> indicoio.add_data([[text, .5]]) """ batch = isinstance(data[0], list) if batch: X, Y = zip(*data) X = image_preprocess(X, batch=batch) data = map(list, zip(X, Y)) else: data[0] = image_preprocess(data[0], batch=batch) kwargs['collection'] = self.collection if self.domain: kwargs["domain"] = self.domain url_params = {"batch": batch, "api_key": api_key, "version": version, 'method': "add_data"} return api_handler(data, cloud=cloud, api="custom", url_params=url_params, **kwargs)
def collections(cloud=None, api_key=None, version=None, **kwargs): """ This is a status report endpoint. It is used to get the status on all of the collections currently trained, as well as some basic statistics on their accuracies. Inputs api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> collections = indicoio.collections() { "tag_predictor": { "input_type": "text", "model_type": "classification", "number_of_samples": 224 'status': 'ready' }, "popularity_predictor": { "input_type": "text", "model_type": "regression", "number_of_samples": 231 'status': 'training' } } } """ url_params = { "batch": False, "api_key": api_key, "version": version, "method": "collections" } return api_handler(None, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def text_features(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns a numeric feature vector that represents the content. Example usage: .. code-block:: python >>> from indicoio import text_features >>> text_features("Queen of England") [0.04509247093572533, -0.052756784338865576, ...] :param text: The text to be analyzed. :type text: str or unicode :rtype: List of floats which represents the content of the input text """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="textfeatures", url_params=url_params, **kwargs)
def twitter_engagement(text, cloud=None, batch=False, api_key=None, **kwargs): """ Given input text, returns an engagment score between 0 and 1 Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' >>> engagement = indicoio.twitter_engagement(text) :param text: The text to be analyzed. :type text: str or unicode :rtype: Float of engagement between 0 and 1 """ url_params = {"batch": batch, "api_key": api_key} return api_handler(text, cloud=cloud, api="twitterengagement", url_params=url_params, **kwargs)
def keywords(text, cloud=None, batch=False, api_key=None, **kwargs): """ Given input text, returns series of keywords and associated scores Example usage: .. code-block:: python >>> import indicoio >>> import numpy as np >>> text = 'Monday: Delightful with mostly sunny skies. Highs in the low 70s.' >>> keywords = indicoio.keywords(text, top_n=3) >>> print "The keywords are: "+str(keywords.keys()) u'The keywords are ['delightful', 'highs', 'skies'] :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of feature score pairs """ url_params = {'batch': batch, 'api_key': api_key} return api_handler(text, cloud=cloud, api="keywords", url_params=url_params, **kwargs)
def intersections(data, apis=None, **kwargs): """ Helper to make multi requests of different types. :param data: Data to be sent in API request :param type: String type of API request :rtype: Dictionary of api responses """ # Client side api name checking for api in apis: assert api not in MULTIAPI_NOT_SUPPORTED # remove auto-inserted batch param kwargs.pop('batch', None) if not isinstance(apis, list) or len(apis) != 2: raise IndicoError("Argument 'apis' must be of length 2") if isinstance(data, list) and len(data) < 3: raise IndicoError( "At least 3 examples are required to use the intersections API") api_types = list(map(API_TYPES.get, apis)) if api_types[0] != api_types[1]: raise IndicoError( "Both `apis` must accept the same kind of input to use the intersections API" ) cloud = kwargs.pop("cloud", None) url_params = { 'batch': False, 'api_key': kwargs.pop('api_key', None), 'apis': apis } return api_handler(data, cloud=cloud, api="apis/intersections", url_params=url_params, **kwargs)
def sentiment_hq(text, cloud=None, batch=False, api_key=None, **kwargs): """ Given input text, returns a scalar estimate of the sentiment of that text. Values are roughly in the range 0 to 1 with 0.5 indicating neutral sentiment. For reference, 0 suggests very negative sentiment and 1 suggests very positive sentiment. Example usage: .. code-block:: python >>> from indicoio import sentimenthq >>> text = 'Thanks everyone for the birthday wishes!! It was a crazy few days ><' >>> sentiment = sentimenthq(text) >>> sentiment 0.6210052967071533 :param text: The text to be analyzed. :type text: str or unicode :rtype: Float """ url_params = {"batch": batch, "api_key": api_key} return api_handler(text, cloud=cloud, api="sentimenthq", url_params=url_params, **kwargs)
def train(self, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the basic training endpoint. Given an existing dataset this endpoint will train a model. Inputs collection - String: the name of the collection to train a model using api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> indicoio.train(collection) """ kwargs['collection'] = self.collection url_params = {"batch": batch, "api_key": api_key, "version": version, 'method': "train"} return api_handler(self.collection, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def relevance(data, queries, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text and a list of query terms / phrases, returns how relevant the query is to the input text. Example usage: .. code-block:: python >>> import indicoio >>> text = 'On Monday, president Barack Obama will be giving his keynote address at...' >>> relevance = indicoio.relevance(text, queries=['president']) >>> print "Relevance: " + str(relevance[0]) u'Relevance: [0.44755361996336784]' :param text: The text to be analyzed. :param queries: a list of terms or phrases to measure similarity against :type text: str or unicode :rtype: Dictionary of feature score pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} kwargs['queries'] = queries return api_handler(data, cloud=cloud, api="relevance", url_params=url_params, **kwargs)
def image_recognition(image, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given an input image, returns a dictionary of image classifications with associated scores * Input can be either grayscale or rgb color and should either be a numpy array or nested list format. * Input data should be either uint8 0-255 range values or floating point between 0 and 1. * Large images (i.e. 1024x768+) are much bigger than needed, minaxis resizing will be done internally to 144 if needed. * For ideal performance, images should be square aspect ratio but non-square aspect ratios are supported as well. Example usage: .. code-block:: python >>> from indicoio import image_recognition >>> features = image_recognition(<filename>) :param image: The image to be analyzed. :type image: str :rtype: dict containing classifications """ image = image_preprocess(image, size=144, min_axis=True, batch=batch) url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(image, cloud=cloud, api="imagerecognition", url_params=url_params, **kwargs)
def content_filtering(image, cloud=None, batch=False, api_key=None, **kwargs): """ Given a grayscale input image, returns how obcene the image is. Input should be in a list of list format. Example usage: .. code-block:: python >>> from indicoio import content_filtering >>> import numpy as np >>> face = np.zeros((48,48)).tolist() >>> res = content_filtering(face) >>> res .056 :param image: The image to be analyzed. :type image: list of lists :rtype: float of nsfwness """ image = image_preprocess(image, batch=batch, size=None, min_axis=128) url_params = {"batch": batch, "api_key": api_key} return api_handler(image, cloud=cloud, api="contentfiltering", url_params=url_params, **kwargs)
def train(self, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ This is the basic training endpoint. Given an existing dataset this endpoint will train a model. Inputs collection - String: the name of the collection to train a model using api_key (optional) - String: Your API key, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. cloud (optional) - String: Your private cloud domain, required only if the key has not been declared elsewhere. This allows the API to recognize a request as yours and automatically route it to the appropriate destination. Example usage: .. code-block:: python >>> indicoio.train(collection) """ kwargs['collection'] = self.collection url_params = { "batch": batch, "api_key": api_key, "version": version, 'method': "train" } return api_handler(self.collection, cloud=cloud, api="custom", url_params=url_params, private=True, **kwargs)
def political(text, cloud=None, batch=False, api_key=None, version=None, **kwargs): """ Given input text, returns a probability distribution over the political alignment of the speaker. Example usage: .. code-block:: python >>> from indicoio import political >>> import numpy as np >>> text = 'Wish we had more bike lanes. \ Hopefully, driverless cars will chance economics from ownership to fee for service.' >>> affiliation = political(text) >>> affiliation {u'Libertarian': 0.4923755446986322, u'Green': 0.2974443102818122, u'Liberal': 0.13730032938784784, u'Conservative': 0.07287981563170784} >>> least_like = affiliation.keys()[np.argmin(affiliation.values())] >>> most_like = affiliation.keys()[np.argmax(affiliation.values())] >>> 'This text is most like %s and least like %s'%(most_like,least_like) u'This text is most like Libertarian and least like Conservative' :param text: The text to be analyzed. :type text: str or unicode :rtype: Dictionary of party probability pairs """ url_params = {"batch": batch, "api_key": api_key, "version": version} return api_handler(text, cloud=cloud, api="political", url_params=url_params, **kwargs)
def test_api_handler(mock_warn): from indicoio.utils.api import api_handler api_handler("test", cloud=None, api='sentiment') assert mock_warn.called_with(mock_response.headers.get('x-warning'))