예제 #1
0
def categorization():
    try:
        # deep categorization api call
        formatted_categories = ''

        deepcat_response = meaningcloud.DeepCategorizationResponse(
            meaningcloud.DeepCategorizationRequest(license_key,
                                                   model=model,
                                                   doc=document).sendReq())

        if deepcat_response.isSuccessful():
            categories = deepcat_response.getCategories()
            if categories:
                print("\tCategories detected (" + str(len(categories)) +
                      "):\n")
                for cat in categories:
                    print("\t\t" + deepcat_response.getCategoryLabel(cat) +
                          ' --> ' +
                          deepcat_response.getCategoryRelevance(cat) + "\n")
            else:
                print("\tNo categories detected!\n")
        else:
            print(
                "\tOops! Request to Deep Categorization was not succesful: (" +
                deepcat_response.getStatusCode() + ') ' +
                deepcat_response.getStatusMsg())
    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))
예제 #2
0
class DeepCategorizationRequestTest(unittest.TestCase):
    URL = 'https://api.meaningcloud.com/deepcategorization-1.0'
    KEY = 'MY_KEY'
    TIMEOUT_DEFAULT = 60
    RESOURCES_DIR = './resources/'
    text = 'London is big'
    model = 'IAB_2.0_en'
    polarity = 'n'
    request = meaningcloud.DeepCategorizationRequest(KEY, model=model, txt=text, polarity=polarity)

    def testConstruct(self):
        request = self.request
        self.assertEqual(self.URL, request.getUrl())
        self.assertIsNotNone(request.getParams())
        params = request.getParams()
        self.assertEqual('key' in params.keys(), True)
        self.assertEqual(params['key'], self.KEY)
        self.assertIsNotNone(request.getTimeout())
        self.assertEqual(self.TIMEOUT_DEFAULT, request.getTimeout())

        extraHeaders = ["Accept: application/json"]
        request2 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, txt=self.text, polarity=self.polarity, extraheaders=extraHeaders)
        self.assertIsNotNone(request2.sendReq())

        otherparams = {'key2': 'my_key2'}
        request3 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, txt=self.text, polarity=self.polarity, extraheaders=extraHeaders,
                                             otherparams=otherparams)
        self.assertIsNotNone('key2' in request3.getParams().keys(), True)
        self.assertEqual(request3.getParams()['key2'], 'my_key2')

        url = 'https://en.wikipedia.org/wiki/Star_Trek'
        request4 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, url=url, polarity=self.polarity, extraheaders=extraHeaders,
                                             otherparams=otherparams)

        self.assertIsNotNone('url' in request4.getParams().keys(), True)
        self.assertEqual(request4.getParams()['url'], url)

        file = self.RESOURCES_DIR + 'file.txt'
        request5 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, doc=file, polarity=self.polarity, extraheaders=extraHeaders,
                                             otherparams=otherparams)

        self.assertIsNotNone('doc' in request5.getParams().keys(), False)
        doc = request5._file['doc'].read().decode('utf-8')

        request5._file['doc'].close()
        aux_doc = open(self.RESOURCES_DIR + 'file.txt', 'rb')
        aux_content = aux_doc.read().decode('utf-8')
        aux_doc.close()
        self.assertEqual(aux_content, doc)

        return request

    def testSendReq(self):
        request = self.request
        requestRq = request.sendReq()
        self.assertIsNotNone(requestRq)
예제 #3
0
    def testConstruct(self):
        request = self.request
        self.assertEqual(self.URL, request.getUrl())
        self.assertIsNotNone(request.getParams())
        params = request.getParams()
        self.assertEqual('key' in params.keys(), True)
        self.assertEqual(params['key'], self.KEY)
        self.assertIsNotNone(request.getTimeout())
        self.assertEqual(self.TIMEOUT_DEFAULT, request.getTimeout())

        extraHeaders = ["Accept: application/json"]
        request2 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, txt=self.text, polarity=self.polarity, extraheaders=extraHeaders)
        self.assertIsNotNone(request2.sendReq())

        otherparams = {'key2': 'my_key2'}
        request3 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, txt=self.text, polarity=self.polarity, extraheaders=extraHeaders,
                                             otherparams=otherparams)
        self.assertIsNotNone('key2' in request3.getParams().keys(), True)
        self.assertEqual(request3.getParams()['key2'], 'my_key2')

        url = 'https://en.wikipedia.org/wiki/Star_Trek'
        request4 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, url=url, polarity=self.polarity, extraheaders=extraHeaders,
                                             otherparams=otherparams)

        self.assertIsNotNone('url' in request4.getParams().keys(), True)
        self.assertEqual(request4.getParams()['url'], url)

        file = self.RESOURCES_DIR + 'file.txt'
        request5 = meaningcloud.DeepCategorizationRequest(self.KEY, model=self.model, doc=file, polarity=self.polarity, extraheaders=extraHeaders,
                                             otherparams=otherparams)

        self.assertIsNotNone('doc' in request5.getParams().keys(), False)
        doc = request5._file['doc'].read().decode('utf-8')

        request5._file['doc'].close()
        aux_doc = open(self.RESOURCES_DIR + 'file.txt', 'rb')
        aux_content = aux_doc.read().decode('utf-8')
        aux_doc.close()
        self.assertEqual(aux_content, doc)

        return request
예제 #4
0
def analyzeText(text, model, num_cats):
    global index_count
    print("Classifying text #%s" % str(index_count))

    # this is where we are going to store our results
    formatted_categories = ""

    try:
        # We are going to make a request to the Deep Categorization API
        request = meaningcloud.DeepCategorizationRequest(
            license_key, model=model, txt=text, server=server
        )
        setRequestSource(request)
        response = meaningcloud.DeepCategorizationResponse(request.sendReq())

        if response.isSuccessful():
            categories = response.getCategories()
            formatted_categories = [
                response.getCategoryLabel(cat) for cat in categories[:num_cats]
            ]
        else:
            if isBlockingErrorType(response.getStatusCode()):
                raise ValueError(
                    "Something went wrong in the MeaningCloud request!: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
            else:
                print(
                    "Oops! The request to Deep Categorization for text #"
                    + str(index_count)
                    + " was not succesful: ("
                    + response.getStatusCode()
                    + ") "
                    + response.getStatusMsg()
                )
                formatted_categories = [
                    "ERROR ("
                    + response.getStatusCode()
                    + "): "
                    + response.getStatusMsg()
                ]

    except ValueError as e:
        raise ValueError(str(e))

    index_count += 1

    formatted_categories = formatted_categories + [""] * (
        num_cats - len(formatted_categories)
    )
    return pd.Series(formatted_categories)
def getDeepCategorization(text, model, num_cats):
    # We are going to make a request to the Deep Categorization API
    formatted_categories = ''
    print("\tGetting " + model[0:len(model) - 3].replace('_', ' ') + " analysis...")
    deepcat_response = meaningcloud.DeepCategorizationResponse(meaningcloud.DeepCategorizationRequest(license_key, model=model, txt=text).sendReq())
    if deepcat_response.isSuccessful():
        categories = deepcat_response.getCategories()
        formatted_categories = (', '.join(deepcat_response.getCategoryLabel(cat) + ' (' + deepcat_response.getCategoryRelevance(cat) +')' for cat in categories[:num_cats])) if categories else '(none)'
    else:
        print("\tOops! Request to Deep Categorization was not succesful: (" + deepcat_response.getStatusCode() + ') ' + deepcat_response.getStatusMsg())

    return formatted_categories
예제 #6
0
def analyzeText(text):
    global index_count
    print("Analyzing text " + str(index_count))

    # this is where we are going to store our results
    polarity = ''
    entities = ''
    concepts = ''
    iab2 = ''

    try:
        # We are going to make a request to the Sentiment Analysis API
        print("\tGetting sentiment analysis...")
        sentiment_response = meaningcloud.SentimentResponse(
            meaningcloud.SentimentRequest(license_key,
                                          lang='en',
                                          txt=text,
                                          txtf='markup').sendReq())
        if sentiment_response.isSuccessful():
            polarity = sentiment_response.getGlobalScoreTag()
        else:
            print('Request to sentiment was not succesful: ' +
                  sentiment_response.getStatusMsg())

        # We are going to make a request to the Topics Extraction API
        print("\tGetting entities and concepts...")
        topics_req = meaningcloud.TopicsRequest(license_key,
                                                txt=text,
                                                lang='en',
                                                topicType='ec',
                                                otherparams={'txtf': 'markup'})
        topics_response = meaningcloud.TopicsResponse(topics_req.sendReq())

        # If there are no errors in the request, we extract the entities and concepts
        if topics_response.isSuccessful():
            entities_list = topics_response.getEntities()
            formatted_entities = []
            if entities_list:
                for entity in entities_list:
                    if int(
                            topics_response.getTopicRelevance(entity)
                    ) >= 100:  #we limit the entities to those with relevance higher than 100
                        formatted_entities.append(
                            topics_response.getTopicForm(entity) + ' (' +
                            topics_response.getTypeLastNode(
                                topics_response.getOntoType(entity)) + ')')
                entities = ', '.join(formatted_entities)

            concepts_list = topics_response.getConcepts()
            formatted_concepts = []
            if concepts_list:
                for concept in concepts_list:
                    if int(
                            topics_response.getTopicRelevance(concept)
                    ) >= 100:  #we limit the entities to those with relevance higher than 100
                        formatted_concepts.append(
                            topics_response.getTopicForm(concept))

                concepts = ', '.join(list(dict.fromkeys(formatted_concepts)))
        else:
            print('Request to topics was not succesful: ' +
                  topics_response.getStatusMsg())

        # We are going to make a request to the Deep Categorization API
        print("\tGetting IAB 2.0 classification...")
        deepcat_response = meaningcloud.DeepCategorizationResponse(
            meaningcloud.DeepCategorizationRequest(license_key,
                                                   model='IAB_2.0_en',
                                                   txt=text,
                                                   otherparams={
                                                       'txtf': 'markup'
                                                   }).sendReq())
        if deepcat_response.isSuccessful():
            categories = deepcat_response.getCategories()
            iab2 = (', '.join(
                deepcat_response.getCategoryCode(cat)
                for cat in categories[:1])) if categories else ''
        else:
            print('Request to Deep Categorization was not succesful: ' +
                  deepcat_response.getStatusMsg())

    except ValueError:
        e = sys.exc_info()[0]
        print("\nException: " + str(e))

    index_count += 1

    return pd.Series([polarity, entities, concepts, iab2])