Beispiel #1
0
class TestClientTextApi(unittest.TestCase):
    def setUp(self):
        self.client = FullClient(apiKey=conf.API_KEY,
                                 apiServer=conf.BASE_PATH,
                                 retinaName=conf.RETINA_NAME)

    def testText(self):
        fp = self.client.getFingerprintForText(inputText)
        self.assertNotEqual(fp, None)
        self.assertGreater(len(fp.positions), 500)

    def testKeywords(self):
        termList = self.client.getKeywordsForText(inputText)
        self.assertGreater(len(termList), 2)
        self.assertTrue(conf.isString(termList[0]))

    def testTokenize(self):
        sentences = self.client.getTokensForText(inputText)
        self.assertGreater(len(sentences), 10)
        self.assertTrue(conf.isString(sentences[0]))
        firstSentence = sentences[0].split(',')
        self.assertEqual(firstSentence[0], "george")
        self.assertGreater(len(firstSentence), 10)

        verbsSentences = self.client.getTokensForText(inputText, POStags="VB")
        for verb in verbsSentences[0].split(","):
            self.assertTrue("VERB" in self.client.getTerms(
                term=verb)[0].pos_types)

    def testSlices(self):
        texts = self.client.getSlicesForText(inputText,
                                             getFingerprint=True,
                                             startIndex=0,
                                             maxResults=2)
        self.assertEqual(len(texts), 2)
        self.assertEqual(texts[0].text.split(' ')[0], "George")
        self.assertGreater(len(texts[0].fingerprint.positions), 100)

    def testBulk(self):
        fingerprints = self.client.getFingerprintsForTexts(bulkTexts,
                                                           sparsity=1.0)
        self.assertEqual(len(fingerprints), 4)
        for fp in fingerprints:
            self.assertGreater(len(fp.positions), 100)

    def testLanguageDetection(self):
        self.assertEqual(
            self.client.getLanguageForText("I have a dream!").language,
            "English")
        self.assertEqual(
            self.client.getLanguageForText("Ich bin ein").wiki_url,
            "http://en.wikipedia.org/wiki/German_language")
        self.assertEqual(
            self.client.getLanguageForText(
                "Der var så dejligt ude på landet.").iso_tag, "da")
Beispiel #2
0
class TestClientTextApi(unittest.TestCase):

    def setUp(self):
        self.client = FullClient(apiKey=conf.API_KEY, apiServer=conf.BASE_PATH, retinaName=conf.RETINA_NAME)

    def testText(self):
        fp = self.client.getFingerprintForText(inputText)
        self.assertNotEqual(fp, None)
        self.assertGreater(len(fp.positions), 500)

    def testKeywords(self):
        termList = self.client.getKeywordsForText(inputText)
        self.assertGreater(len(termList), 2)
        self.assertTrue(conf.isString(termList[0]))

    def testTokenize(self):
        sentences = self.client.getTokensForText(inputText)
        self.assertGreater(len(sentences), 10)
        self.assertTrue(conf.isString(sentences[0]))
        firstSentence = sentences[0].split(',')
        self.assertEqual(firstSentence[0], "george")
        self.assertGreater(len(firstSentence), 10)

        verbsSentences = self.client.getTokensForText(inputText, POStags="VB")
        for verb in verbsSentences[0].split(","):
            self.assertTrue("VERB" in self.client.getTerms(term=verb)[0].pos_types)

    def testSlices(self):
        texts = self.client.getSlicesForText(inputText, getFingerprint=True, startIndex=0, maxResults=2)
        self.assertEqual(len(texts), 2)
        self.assertEqual(texts[0].text.split(' ')[0], "George")
        self.assertGreater(len(texts[0].fingerprint.positions), 100)

    def testBulk(self):
        fingerprints = self.client.getFingerprintsForTexts(bulkTexts, sparsity=1.0)
        self.assertEqual(len(fingerprints), 4)
        for fp in fingerprints:
            self.assertGreater(len(fp.positions), 100)
        
    def testLanguageDetection(self):
        self.assertEqual(self.client.getLanguageForText("I have a dream!").language, "English")
        self.assertEqual(self.client.getLanguageForText("Ich bin ein").wiki_url, "http://en.wikipedia.org/wiki/German_language")
        self.assertEqual(self.client.getLanguageForText("Der var så dejligt ude på landet.").iso_tag, "da")
class LiteClient(object):
    """Minimalistic client for accessing core features of Cortical.io's Retina API in a simple way."""
    
    def __init__(self, apiKey):
        self._fullClient = FullClient(apiKey, apiServer="http://api.cortical.io/rest", retinaName="en_associative")

    def _createDictionary(self, textOrFingerprint):
        if type(textOrFingerprint) == str:
            return {"text": textOrFingerprint}
        elif type(textOrFingerprint) == list:
            return {"positions": textOrFingerprint}
        else:
            raise CorticalioException("Invalid argument, cannot create input from: '%s'" % (str(textOrFingerprint)))

    def getSimilarTerms(self, textOrFingerprint):
        """Get the similar terms for a given text or fingerprint
        Args:
            textOrFingerprint, str OR list of integers
        Returns:
            list of str: the 20 most similar terms
        Raises:
            CorticalioException: if the request was not successful
        """
        expression = self._createDictionary(textOrFingerprint)
        terms = self._fullClient.getSimilarTermsForExpression(json.dumps(expression), maxResults=20)
        return [t.term for t in terms]

    def getKeywords(self, text):
        """Get a list of keywords from the text
        Args:
            text, str: The input document
        Returns:
            list of str
        Raises:
            CorticalioException: if the request was not successful
        """
        terms = self._fullClient.getKeywordsForText(text)
        return terms

    def getFingerprint(self, text):
        """Get the semantic fingerprint of the input text.
        Args:
            text, str: The text to be evaluated
        Returns:
            list of str: the positions of the semantic fingerprint
        Raises:
            CorticalioException: if the request was not successful
        """
        fp = self._fullClient.getFingerprintForText(text)
        return fp.positions

    def compare(self, textOrFingerprint1, textOrFingerprint2):
        """Returns the semantic similarity of texts or fingerprints. Each argument can be eiter a text or a fingerprint.
        Args:
            textOrFingerprint1, str OR list of integers
            textOrFingerprint2, str OR list of integers
        Returns:
            float: the semantic similarity in the range [0;1]
        Raises:
            CorticalioException: if the request was not successful
        """
        compareList = [self._createDictionary(textOrFingerprint1), self._createDictionary(textOrFingerprint2)]
        metric = self._fullClient.compare(json.dumps(compareList))
        return metric.cosineSimilarity

    def createCategoryFilter(self, positiveExamples):
        """Creates a filter fingerprint.
        Args:
            positiveExamples, list(str): The list of positive example texts.
        Returns:
            list of int: the positions representing the filter representing the texts
        Raises:
            CorticalioException: if the request was not successful
        """
        categoryFilter = self._fullClient.createCategoryFilter("CategoryFilter", positiveExamples)
        return categoryFilter.positions
Beispiel #4
0
class LiteClient(object):
    """Minimalistic client for accessing core features of Cortical.io's Retina API in a simple way."""
    def __init__(self, apiKey):
        self._fullClient = FullClient(apiKey,
                                      apiServer="http://api.cortical.io/rest",
                                      retinaName="en_associative")

    def _createDictionary(self, textOrFingerprint):
        if type(textOrFingerprint) == str:
            return {"text": textOrFingerprint}
        elif type(textOrFingerprint) == list:
            return {"positions": textOrFingerprint}
        else:
            raise CorticalioException(
                "Invalid argument, cannot create input from: '%s'" %
                (str(textOrFingerprint)))

    def getSimilarTerms(self, textOrFingerprint):
        """Get the similar terms for a given text or fingerprint
        Args:
            textOrFingerprint, str OR list of integers
        Returns:
            list of str: the 20 most similar terms
        Raises:
            CorticalioException: if the request was not successful
        """
        expression = self._createDictionary(textOrFingerprint)
        terms = self._fullClient.getSimilarTermsForExpression(
            json.dumps(expression), maxResults=20)
        return [t.term for t in terms]

    def getKeywords(self, text):
        """Get a list of keywords from the text
        Args:
            text, str: The input document
        Returns:
            list of str
        Raises:
            CorticalioException: if the request was not successful
        """
        terms = self._fullClient.getKeywordsForText(text)
        return terms

    def getFingerprint(self, text):
        """Get the semantic fingerprint of the input text.
        Args:
            text, str: The text to be evaluated
        Returns:
            list of str: the positions of the semantic fingerprint
        Raises:
            CorticalioException: if the request was not successful
        """
        fp = self._fullClient.getFingerprintForText(text)
        return fp.positions

    def compare(self, textOrFingerprint1, textOrFingerprint2):
        """Returns the semantic similarity of texts or fingerprints. Each argument can be eiter a text or a fingerprint.
        Args:
            textOrFingerprint1, str OR list of integers
            textOrFingerprint2, str OR list of integers
        Returns:
            float: the semantic similarity in the range [0;1]
        Raises:
            CorticalioException: if the request was not successful
        """
        compareList = [
            self._createDictionary(textOrFingerprint1),
            self._createDictionary(textOrFingerprint2)
        ]
        metric = self._fullClient.compare(json.dumps(compareList))
        return metric.cosineSimilarity

    def createCategoryFilter(self, positiveExamples):
        """Creates a filter fingerprint.
        Args:
            positiveExamples, list(str): The list of positive example texts.
        Returns:
            list of int: the positions representing the filter representing the texts
        Raises:
            CorticalioException: if the request was not successful
        """
        categoryFilter = self._fullClient.createCategoryFilter(
            "CategoryFilter", positiveExamples)
        return categoryFilter.positions