コード例 #1
0
    def ner_process(self, process_type, sentence):
        """
        Initiated the Respective NER provider object
        Implements the Factory Design Pattern
        :param process_type: process_type can be <spacy|nltk|aws|gcloud|allen>
        :param sentence: Sentence to check for the NER
        :return: entities
        """
        try:
            if process_type == 'gcloud':
                ner = GCloudNLP()
            elif process_type == 'spacy':
                ner = SpacyNLP()
            elif process_type == 'allen':
                ner = AllenNLP()
            elif process_type == 'nltk':
                ner = NLTKNLP()
            elif process_type == 'stanford':
                ner = NLTKNLP()
            elif process_type == 'aws':
                ner = AWSNLP()

            if process_type in ('aws'):
                ner.client_info = self.read_client_info(service_type='ner',
                                                        client=process_type)
            return ner.recognise_entities(sentence)

        except Exception as error:
            logger.error(error)
コード例 #2
0
    def get_service(self,
                    service_name,
                    service_type=None,
                    service_region_name=None,
                    aws_access_key_id=None,
                    aws_secret_access_key=None):
        """
        Returns the AWS service details when provided the service name
        :param service_name:
        :return:
        """
    
        try:
            # @TODO: To be removed
            if aws_access_key_id and aws_secret_access_key:
                credentials = {
                'aws_access_key_id': aws_access_key_id,
                'aws_secret_access_key': aws_secret_access_key
                }

        except KeyError as key_error:
            logger.error(key_error)

        if service_type == 'client':
            return boto3.client(service_name, service_region_name, **credentials)
        else:
            return boto3.resource(service_name, service_region_name, **credentials)
コード例 #3
0
    def recognise_entities(self, text):
        try:
            combined_entities = defaultdict(list)
            doc = self.nlp(text)
            # Analyze syntax
            noun_phrases = [chunk.text for chunk in doc.noun_chunks]
            verbs = [token.lemma_ for token in doc if token.pos_ == "VERB"]
            #for entity in doc.ents:
            [combined_entities[entity.label_].append(entity.text) for entity in doc.ents]

            return {'combined_entities': dict(combined_entities), 'metadata': {'noun_phrases': noun_phrases, 'verbs': verbs}}
        except Exception as error:
            logger.error(error)
コード例 #4
0
    def read_client_info(**kwargs):
        try:
            with open("../conf/client.json") as fp:
                client_info = json.load(fp)

            if kwargs.get('service_type'):
                client_info = client_info[kwargs.get('service_type')]
            if kwargs.get('client'):
                client_info = client_info[kwargs.get('client')]

            return client_info

        except Exception as error:
            logger.error(error)
コード例 #5
0
    def __init__(self, **kwargs):

        if kwargs:
            self.process_type = kwargs.get('process_type') if kwargs.get(
                'process_type') else 'spacy'
            self.sentence = kwargs.get('sentence')

        else:
            logger.error("'sentence' param is mandatory!!!")
            sys.exit(1)
        logger.info(
            f"Recognising Entities using '{self.process_type}' . . . .")
        self.entities = self.ner_process(self.process_type, self.sentence)
        print("\nENTITIES: {}".format(self.entities.get('combined_entities')))
コード例 #6
0
ファイル: aws_nlp.py プロジェクト: henin/heroka_nlp
    def recognise_entities(self, text):
        try:

            combined_entities = defaultdict(list)
            comprehend = super().get_service(service_name="comprehend",
                                       service_type="client",
                                       service_region_name=self.client_info['region'],
                                       aws_access_key_id=self.client_info['aws_access_key_id'],
                                       aws_secret_access_key=self.client_info['aws_secret_access_key'])

            for item in comprehend.detect_entities(Text=text, LanguageCode='en')['Entities']:
                combined_entities[item['Type']].append(item['Text'])    
            return {'combined_entities': dict(combined_entities), 'metadata': comprehend.detect_entities(Text=text, LanguageCode='en')['Entities']}
        except Exception as error:
            logger.error(error)
コード例 #7
0
ファイル: gcloud_nlp.py プロジェクト: henin/heroka_nlp
    def sentiment(self, text):
        try:

            document = types.Document(content=text,
                                      type=enums.Document.Type.PLAIN_TEXT)

            # Detects the sentiment of the text
            sentiment = self.client.analyze_sentiment(
                document=document).document_sentiment
            sentiment_result = {
                'score': sentiment.score,
                'magnitude': sentiment.magnitude
            }
            return sentiment_result
        except Exception as error:
            logger.error(error)
コード例 #8
0
def main():
    try:
        if len(sys.argv) == 3:
            sentence = sys.argv[1]
            process_type = sys.argv[2]

        elif len(sys.argv) == 2:
            sentence = sys.argv[1]
            process_type = "spacy"
        else:
            logger.info(
                "Usage: python heroka_ner.py <sentence> <spacy|aws|nltk|allen|stanford>"
            )
            sys.exit(1)

        ner = NER(process_type=process_type, sentence=sentence)
        return ner
    except Exception as error:
        logger.error(error)
コード例 #9
0
ファイル: allen_nlp.py プロジェクト: henin/heroka_nlp
    def recognise_entities(self, listofnames, entities=None):
        """
        Recoginise entities through Allen NLP API
        :param listofnames: List of sentence tokens
        :param entities: specifiy what entities to return (ORG, PERSON)
        :return:
        """
        names_entity = defaultdict(list)
        for name in listofnames:
            try:
                #Api Approcach
                #url = "https://demo.allennlp.org/predict/named-entity-recognition"
                url = "https://demo.allennlp.org/api/named-entity-recognition/predict"
                #defining a params dict for the parameters to be sent to the API
                payload = {"sentence": name}
                # sending get request and saving the response as response object
                try:
                    post_response = requests.post(url=url, json=payload)
                except:
                    time.sleep(2)
                    post_response = requests.post(url=url, json=payload)
                if post_response.status_code != 200:
                    logger.error("Unable to hit ALLEN NLP API!!!!")
                    sys.exit()

                results = post_response.json()
                if results:
                    names_entity[name] = \
                        {res for res in zip(results['tags'], results['words']) if not res[0] == 'O'}
            except Exception as error:
                logger.warning(error)
                continue

        if entities:
            requested_entities = {}
            for entity in entities:
                requested_entities[entity] = names_entity[entity]
            return dict(requested_entities)
        else:
            return dict(names_entity)
コード例 #10
0
ファイル: nltk_nlp.py プロジェクト: henin/heroka_nlp
    def recognise_entities(self, text):
        """
        Recognise entities
        :param text: Sentence
        :return: Entities
        """
        entities_res = defaultdict(list)
        try:
            words = nltk.word_tokenize(text)

            pos_tags = nltk.pos_tag(words)
            chunk = nltk.ne_chunk(pos_tags)
            for ele in chunk.subtrees():
                try:
                    if not ele.label() == 'S':
                        entities_res[ele.label()].append(ele.leaves()[0][0])
                except Exception as err:
                    continue
        except Exception as error:
            logger.error(error)

        return {"combined_entities": dict(entities_res), "metadata": chunk}
コード例 #11
0
ファイル: gcloud_nlp.py プロジェクト: henin/heroka_nlp
 def recognise_entities(self, text):
     """
     Functionalities to recognise entities
     :param text: Sentence
     :return:
     """
     try:
         combined_entities = defaultdict(list)
         encoding_type = 'UTF32'
         document = language.types.Document(
             content=text, type=language.enums.Document.Type.PLAIN_TEXT)
         response = self.client.analyze_entities(
             document=document, encoding_type=encoding_type)
         for entity in response.entities:
             combined_entities[enums.Entity.Type(entity.type).name].append(
                 entity.name)
         self.entities = dict(combined_entities)
         return {
             'combined_entities': dict(combined_entities),
             'metadata': response
         }
         #{response.entity, response.type,response.metadata, response.salience}
     except Exception as error:
         logger.error(error)
コード例 #12
0
#
# Usage: 

#

# ======================================================================================================================


from heroka_nlp import logger

import sys

try:
    import boto3
except ImportError as ie:
    logger.error(ie)
    sys.exit(1)


class AWS:

    def __init__(self, **kwargs):

        if kwargs:
            self.service = kwargs.get('service')
            self.service_type = kwargs.get('service_type', 'resource')

    def get_service(self,
                    service_name,
                    service_type=None,
                    service_region_name=None,