Python spacyMatcher Examples, spacyMatcher.spacyMatcher Python Examples

Example #1

0

Show file

def test_ALL():  #ALL tages
    set_all = [{
        'type': 'PERSON',
        'topic': 'Donald Trump'
    }, {
        'type': 'GPE',
        'topic': 'USA'
    }, {
        'type': 'ORG',
        'topic': 'BBC'
    }, {
        'type': 'PERCENT',
        'topic': '90%'
    }, {
        'type': 'LANGUAGE',
        'topic': 'English'
    }, {
        'type': 'DATE',
        'topic': 'Today'
    }, {
        'type': 'TIME',
        'topic': '11:00'
    }, {
        'type': 'LOC',
        'topic': 'Rocky Mountains'
    }, {
        'type': 'NORP',
        'topic': 'Spanish'
    }, {
        'type': 'EVENT',
        'topic': 'World War II'
    }, {
        'type': 'WORK_OF_ART',
        'topic': 'Bible'
    }, {
        'type': 'MONEY',
        'topic': '10,000,000'
    }, {
        'type': 'QUANTITY',
        'topic': '20 kilometers'
    }, {
        'type': 'ORDINAL',
        'topic': 'third'
    }, {
        'type': 'CARDINAL',
        'topic': 'a thousand'
    }]
    assert spacyMat.spacyMatcher(text, '') == set_all

Example #2

0

Show file

def test_Cardinal(
):  #Numerals that do not fall under another type (not ordinal, quantity ..)
    set_card = [{'type': 'CARDINAL', 'topic': 'a thousand'}]
    assert spacyMat.spacyMatcher(text, 'CARDINAL') == set_card

Example #3

0

Show file

def test_Ordinal():  #“first”, “second”, etc.
    set_ord = [{'type': 'ORDINAL', 'topic': 'third'}]
    assert spacyMat.spacyMatcher(text, 'ORDINAL') == set_ord

Example #4

0

Show file

def test_Quantity():  #Measurements, as of weight or distance.
    set_qty = [{'type': 'QUANTITY', 'topic': '20 kilometers'}]
    assert spacyMat.spacyMatcher(text, 'QUANTITY') == set_qty

Example #5

0

Show file

def test_Money():  #Monetary values, including unit.
    set_money = [{'type': 'MONEY', 'topic': '10,000,000'}]
    assert spacyMat.spacyMatcher(text, 'MONEY') == set_money

Example #6

0

Show file

def test_WorkArt():  #Titles of books, songs, etc.
    set_art = [{'type': 'WORK_OF_ART', 'topic': 'Bible'}]
    assert spacyMat.spacyMatcher(text, 'WORK_OF_ART') == set_art

Example #7

0

Show file

def test_Event():  #Named hurricanes, battles, wars, sports events, etc.
    set_event = [{'type': 'EVENT', 'topic': 'World War II'}]
    assert spacyMat.spacyMatcher(text, 'EVENT') == set_event

Example #8

0

Show file

def test_GPE():  #GPE - Countries, cities, states.
    set_gpe = [{'type': 'GPE', 'topic': 'USA'}]
    assert spacyMat.spacyMatcher(text, 'GPE') == set_gpe

Example #9

0

Show file

def test_Loc():  #Non-GPE locations, mountain ranges, bodies of water.
    set_loc = [{'type': 'LOC', 'topic': 'Rocky Mountains'}]
    assert spacyMat.spacyMatcher(text, 'LOC') == set_loc

Example #10

0

Show file

def test_Time():  #Times smaller than a day.
    set_time = [{'type': 'TIME', 'topic': '11:00'}]
    assert spacyMat.spacyMatcher(text, 'TIME') == set_time

Example #11

0

Show file

def test_Date():  #Absolute or relative dates or periods.
    set_date = [{'type': 'DATE', 'topic': 'Today'}]
    assert spacyMat.spacyMatcher(text, 'DATE') == set_date

Example #12

0

Show file

def test_Language():  #Any named language.
    set_lang = [{'type': 'LANGUAGE', 'topic': 'English'}]
    assert spacyMat.spacyMatcher(text, 'LANGUAGE') == set_lang

Example #13

0

Show file

def test_Percent():  #Percentage, including ”%“.
    set_pct = [{'type': 'PERCENT', 'topic': '90%'}]
    assert spacyMat.spacyMatcher(text, 'PERCENT') == set_pct

Example #14

0

Show file

def test_Org():  #ORG - Companies, agencies, institutions, etc.
    set_org = [{'type': 'ORG', 'topic': 'BBC'}]
    assert spacyMat.spacyMatcher(text, 'ORG') == set_org

Example #15

0

Show file

def test_Norp():  #Nationalities or religious or political groups.
    set_norp = [{'type': 'NORP', 'topic': 'Spanish'}]
    assert spacyMat.spacyMatcher(text, 'NORP') == set_norp

Example #16

0

Show file

def lambda_handler(event, context):

    ## event must be a dict with a url key, and context can be nothing:
    ## lambda_handler({"url":"http://bbc.co.uk"}, "")

    subsegment = xray_recorder.begin_subsegment('lambda_function: check URL')
    logger.info(f'LambdaFunction: Checking we have a URL...')
    try:
        url = event['url']
    except KeyError:
        logger.info(f'LambdaFunction: URL is not present.')
        return {"error": "No URL provided"}

    xray_recorder.end_subsegment()

    #### Adding "https://" to the URL if not present
    logger.info(f'LambdaFunction: Checking URL has protocol...')
    subsegment = xray_recorder.begin_subsegment(
        'lambda_function: check URL protocol')
    if (not url.startswith('https://') and not url.startswith('http://')):
        url = 'https://' + url
    xray_recorder.end_subsegment()

    logger.info(f'LambdaFunction: Validating URL...')
    subsegment = xray_recorder.begin_subsegment(
        'lambda_function: validate URL')
    if (not validators.url(url)):
        logger.info(f'LambdaFunction: URL is not valid.')
        return {"error": "The url was bad"}
    xray_recorder.end_subsegment()

    #### Define the object skeleton
    object = {"url": url, "results": []}

    # CredibilityScore
    logger.info(f'LambdaFunction: Trying to get credibility score...')
    credibilityresult = {}
    try:
        credibilityresult = cr.getCredibilityScore(url)
        object['results'].append(credibilityresult)
    except Exception as e:
        logger.info(f'LambdaFunction: Could not get Credibility Score.')
        logger.info(e)
        credibilityresult = {
            'type': 'credibility',
            'outcome': {
                "error": "The credibility score was not available."
            }
        }
        object['results'].append(credibilityresult)

    # SentimentAnalisys
    logger.info(f'LambdaFunction: Trying to get sentimentAnalysis score...')
    sentanalysisresult = {}
    try:
        sentanalysisresult = sa.sentimentAnalysis(url)
        if sentanalysisresult['text'] == '-1':
            logger.info(
                f'LambdaFunction: sentimentAnalysis returned -1, dumping:')
            logger.info(sentanalysisresult)
            object['article'] = {
                'error': "The article summary could not be generated"
            }
            object['results'].append({
                'type': 'polarity',
                "outcome": {
                    "error": "The polarity score could not be calculated."
                }
            })
            object['results'].append({
                'type': 'objectivity',
                "outcome": {
                    "error": "The objectivity score could not be calculated."
                }
            })
        else:
            object['article'] = {
                'header': sentanalysisresult['header'],
                'summary': sentanalysisresult['summary'],
                'keywords': sentanalysisresult['keywords'],
                'image': sentanalysisresult['image']
            }
            object['results'].append({
                'type': 'polarity',
                'outcome': {
                    "score": sentanalysisresult['polarity']
                }
            })
            object['results'].append({
                'type': 'objectivity',
                'outcome': {
                    "score": abs(1 - sentanalysisresult['subjectivity'])
                }
            })

    except Exception as e:
        logger.info(f'LambdaFunction: Could not get sentimentAnalysis Score.')
        exception_type, exception_value, exception_traceback = sys.exc_info()
        traceback_string = traceback.format_exception(exception_type,
                                                      exception_value,
                                                      exception_traceback)
        err_msg = json.dumps({
            "errorType": exception_type.__name__,
            "errorMessage": str(exception_value),
            "stackTrace": traceback_string
        })
        logger.error(err_msg)

        sentanalysisresult = {'text': -1}
        object['article'] = {
            'error': "The article summary could not be generated"
        }
        object['results'].append({
            'type': 'polarity',
            "outcome": {
                "error": "The polarity score could not be calculated."
            }
        })
        object['results'].append({
            'type': 'objectivity',
            "outcome": {
                "error": "The objectivity score could not be calculated."
            }
        })

    # BiasScore
    logger.info(f'LambdaFunction: Trying to get bias score...')
    if sentanalysisresult[
            'text'] != '-1':  #### if there is no POL or SUBJ, getBiasScore will not be called
        if 'error' in credibilityresult['outcome']:
            cred_input = -1
        else:
            cred_input = credibilityresult['outcome']['score']
        try:
            biasscoreresult = bs.getBiasScore(
                cred_input, sentanalysisresult['polarity'],
                sentanalysisresult['subjectivity'])
            object['results'].append(biasscoreresult)
        except Exception as e:
            logger.info(f'LambdaFunction: Could not get Bias Score.')
            logger.info(e)
            object['results'].append({
                'type': 'bias',
                'outcome': {
                    "error": "The bias score was not available."
                }
            })
    else:
        object['results'].append({
            'type': 'bias',
            'outcome': {
                "error": "The bias score was not available."
            }
        })

    # SpacyMatcher
    logger.info(f'LambdaFunction: Trying to get Spacy Matcher...')
    if sentanalysisresult['text'] != '-1':
        try:
            list_objs = sm.spacyMatcher(sentanalysisresult['text'],
                                        '')  ## ALL Tags
            object['article']['topics'] = list_objs
        except Exception as e:
            logger.info(f'LambdaFunction: Could not get Topics.')
            logger.info(e)
            object['article']['topics'] = {"error": "No topics available."}

    #### JSON to return:
    # {
    #   'url':'https://www.theguardian.com/world/2020/',
    #   'article' : {
    #     'header' : 'PM admits failings as England's Covid contact',
    #     'summary' : Boris Johnson and his chief scientific ...',
    #     'keywords' : ['Boris Johnson', 'Brexit'],
    #     'topics': [{'type': 'DATE', 'topic': 'Today'}]}}
    #   },
    #   'results' : [
    #     { 'type' : 'credibility', 'outcome': {'score': 100.0, 'source ...
    #     { 'type' : 'polarity',    'outcome': {'score': 0.108126295001 ...
    #     { 'type' : 'objectivity', 'outcome': {'score': 0.487846736596 ...
    #     { 'type' : 'bias',        'outcome': {'score': 20.67598528015 ...
    #   ]
    # }

    return object

Example #17

0

Show file

def test_Person():  # PERSON - People, including fictional.
    set_person = [{'type': 'PERSON', 'topic': 'Donald Trump'}]
    assert spacyMat.spacyMatcher(text, 'PERSON') == set_person