Exemple #1
0
# letzten 5 bis Ende
print(doc5[-5:])
# letzte Wort / Sonderzeichen
print(doc5[-1])

# Englisch laden
nlp_en = spacy.load('en_core_web_sm')

print('-----------------------------')
doc6 = nlp_de(u'BMW baut eine neue Fabrik in Bad Reichenhall für $6 Millionen')

# Entitäten ausgeben
for ent in doc6.ents:
    print(ent.text)
    print(ent.label_)
    print(spacy.explain(ent.label_))

print('-----------------------------')
doc7 = nlp_de(
    u'Autonome Fahrzeuge verlagern die Versicherungsverantwortung zum Hersteller. Der grüne, alte, rostende Mercedes fährt um die Ecke.'
)

# gibt zusammenhängende Strings aus
for chunk in doc7.noun_chunks:
    print(chunk.text)

# displaCy - Visualisierung
print('-----------------------------')
doc8 = nlp_de(u'BMW baut eine neue Fabrik in Bad Reichenhall für $6 Millionen')
displacy.serve(doc8, style='dep', options={'distance': 80})  # jupyter=True
displacy.serve(doc8, style='ent', options={'distance': 80})
Exemple #2
0
def parseFindPhraseAndIndex(document, annotation_obj, displayDeps=0):
    # def parseFindPhraseAndIndex (document,annotationList,dependencyList=0,displayDeps=0):
    ############ parases doc, and finds phrases within sentence: ###################
    ############ used for context specific tasks where colocation is important #####
    annotation_offset = 288  ###HARD-CODED OFFSET FOR RAD DOCS###
    import spacy
    import re
    document = unicode(document)
    nlp = spacy.load('en')
    doc = nlp(document)
    ####
    ###########THINK ABOUT MESSING WITH THIS TO FOCUS ON NOUN CHUNKS AT THIS POINT#######
    ##
    ###########show parse tree for debugging and tuning#############
    if displayDeps != 0:
        from spacy import displacy
        options = {
            'compact': True,
            'bg': '#09a3d5',
            'color': 'white',
            'font': 'Source Sans Pro'
        }
        displacy.serve(doc, style='dep', options=options)

    sents = list(doc.sents)
    termsFoundList = []  ##holds all terms found
    startIndices = []
    firstCount = 0
    from keyValue import keyValue
    pertinentSentence = [0] * len(
        sents
    )  ###tracks whether there was a key hit at a sentence, and if so, what they key(s) were
    indexed_key_value_list = [0] * len(
        sents
    )  ###holds key value objects indexed to the sentence in which they were found
    KVList = [0] * len(sents)
    ###flip this with sentences
    sentCount = 0
    findingDic = {}
    annotation_order = annotation_obj.access_specification
    for sent in sents:  ### broke when flipped order of sentences and everything else
        listCounter = 0
        sent_index = sent[0].idx
        full_offset = sent_index + annotation_offset

        ### TO -DO #####
        ###1.Figure out how to parse thing in spacy to add dependencies etc...
        ###1.1 Look into creating a simstring matcher dictionary for locations, I think this could be really helpful
        ###1.5 Parse and search within noun chunks themselves
        ###2. Add word/location found reference to 'pertinent index'
        ###3. Keep location search first, but don't actually pass on annotation unless location
        ###finding levels = no loaction, location, super location w/ activity, location w/ activity
        ###4. Perform searches by token rather than index

        activity_findings = []  #store all activity findings to add later
        modifier_findings = []  #store all modifiers to add later
        negation_findings = []  #store all negators to add later
        findingDic = {
        }  #sub dictionary of findings, could be used to create an object all at once at end?
        for list_name in annotation_order:
            basestring = 'annotation_obj.'
            fullstring = basestring + list_name
            thisList = eval(fullstring)
            thisListName = list_name
            thisFoundList = []  ###current list of found items
            if pertinentSentence[sentCount] == 1 or annotation_order[
                    list_name] == 0:
                # phrasesFound=[]   ###phrases that are found
                # startIndices.append(sent[0].idx)  ###sentence start index
                for key in thisList:
                    if thisListName == 'negation_list':  ###make sure you don't grab unpertinent no's
                        foundPhrase = re.findall(
                            r'[^0-9a-zA-Z](' + key + r')[^0-9a-zA-Z]',
                            str(sent)
                        )  #was old note about changing this, but seems unnecessary
                    else:
                        foundPhrase = re.findall(
                            r'([a-z\-A-Z]*?' + key + '[a-zA-Z\-]*?)',
                            str(sent))
                        ###TO-DO
                        #1. alter above to do an oppostie of negator search (i.e. return everything up to spaces or
                        ### punctuation to caputer full surgical or location words

                    if foundPhrase:
                        ##TO-Do:
                        #1. need to only make new objects at appropirate time, and to handle lists of objects
                        pertinentSentence[sentCount] = 1
                        if indexed_key_value_list[
                                sentCount] == 0 and annotation_order[
                                    list_name] == 0:  #if no object here, make one
                            current_kv = keyValue(sentence=sentCount)
                            indexed_key_value_list[sentCount] = current_kv
                            # print('YOU ARE HERE*********** \n '+str(sentCount))

                        elif type(
                                indexed_key_value_list[sentCount]
                        ) != list and indexed_key_value_list[sentCount] != 0:
                            current_kv = indexed_key_value_list[sentCount]
                            indexed_key_value_list[sentCount] = current_kv

                        ### Below should never be triggered, but just in case
                        else:
                            # print "KVLIST"
                            current_kv = indexed_key_value_list[sentCount]
                            indexed_key_value_list[sentCount] = current_kv
                            # continue


#################################################

# # thisKVObject.
                        phrase_count = 0  ##counter for how many phrases have been found in a given sentence
                        for phrase in foundPhrase:
                            if thisListName == 'location_dic':
                                this_concept = thisList[
                                    key]  ##maps back to concept type
                                this_phrase = phrase  ##maps to specific location
                                this_concept_type = 'location'  ##maps to list dictionary (concept type)

                                concept_KV = copy_or_create_KV_concept(
                                    current_kv, this_concept,
                                    this_concept_type, this_phrase, sent,
                                    full_offset, sentCount)
                                if concept_KV == None:
                                    continue  #if function just updates, then no return
                                else:
                                    indexed_key_value_list[
                                        sentCount] = concept_KV

        #################ISSUES = CALLING THIS ON EMPTY OBJECT FOR SOME REASON ##############
        #         'general_modifier_list':1,'location_dic':1,'negation_list':1,'complication_dic':0,'clinical_modifier_dic':0
                            if thisListName == 'clinical_modifier_dic' or thisListName == 'general_modifier_list' or thisListName == 'negation_list':
                                ##** CONVERT TO FORM USED IN FUNCTION **###
                                if thisListName == 'clinical_modifier_dic':
                                    name_to_pass = '******'
                                elif thisListName == 'general_modifier_list':
                                    name_to_pass = '******'
                                elif thisListName == 'negation_list':
                                    name_to_pass = '******'
                                ###TO-DO
                                #####1.1 start with negex to try to do better with negations
                                #####1.2 use dependency parsing to get even better

                                update_key_value(current_kv, name_to_pass,
                                                 phrase, sent, full_offset)
                            elif thisListName == 'complication_dic':
                                this_concept = thisList[
                                    key]  ##maps back to concept type
                                this_phrase = phrase  ##maps to specific location
                                this_concept_type = 'complication'  ##maps to list dictionary (concept type)
                                concept_KV = copy_or_create_KV_concept(
                                    current_kv, this_concept,
                                    this_concept_type, this_phrase, sent,
                                    full_offset, sentCount)
                                # print concept_KV
                                if concept_KV == None:
                                    continue
                                else:
                                    indexed_key_value_list[
                                        sentCount] = concept_KV

                            ###Check is key exists in dictionary
                            ####DOES THIS TO ANYTHING ANYMORE?????
                            if findingDic.get(thisListName):
                                if type(findingDic[thisListName]) == list:
                                    dicList = findingDic[thisListName]
                                    dicList.append(phrase)
                                    findingDic[thisListName] = dicList
                                else:
                                    dicList = [findingDic[thisListName]]
                                    dicList.append(phrase)
                                    findingDic[thisListName] = dicList
                            else:
                                findingDic[thisListName] = phrase
                            ####MAY NOT NEED ANYMORE??????

                            phrase_count += 1
                if thisFoundList:
                    thisFoundList = removeOverlappingPhrases(thisFoundList)
                    if type(thisFoundList[0]) == list:
                        for item in thisFoundList:
                            termsFoundList.append(item)
                    else:
                        termsFoundList.append(thisFoundList)
            ################split above into separate method ##################################

            listCounter += 1
        sentCount += 1
    return termsFoundList, indexed_key_value_list
Exemple #3
0
)
text = (
    "Joe Biden to face test over access to sensitive information as he inherits Donald Trump's secret server"
)

doc = nlp(text)

# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

print(type(doc.ents))
print(doc.ents)

# json_doc = doc.to_json()
# print(type(json_doc))
# print(json_doc)

ent_list = []
# Find named entities, phrases and concepts
for entity in doc.ents:
    # ent_list.append((str(entity.text), str(entity.label_)))
    # print(type(entity.label_))
    print(entity.text, entity.label_)
displacy.serve(doc, style="dep")
# print(type(ent_list[0]))
# print(ent_list)
# json_string = json.dumps(ent_list)
# print(type(json_string))
# print(json_string)
Exemple #4
0
import spacy
from spacy import displacy

text = u"""With a degree of frustration, George tried various piano teachers for some two years (circa. 1911) before finally being introduced to Charles Hambitzer by Jack Miller (circa. 1913), the pianist in the Beethoven Symphony Orchestra. Until his death in 1918, Hambitzer remained Gershwin's musical mentor and taught him conventional piano technique, introduced him to music of the European classical tradition, and encouraged him to attend orchestral concerts. Following such concerts, young Gershwin would essentially try to play, on the piano at home, the music he had heard from recall, and without sheet music. As a matter of course, Gershwin later studied with the classical composer Rubin Goldmark and avant-garde composer-theorist Henry Cowell, thus formalizing his classical music training.

In 1913, Gershwin left school at the age of 15 and found his first job as a "song plugger". His employer was Jerome H. Remick and Company, a Detroit-based publishing firm with a branch office on New York City's Tin Pan Alley, and he earned $15 a week.
"""

nlp = spacy.load('en_core_web_sm')
doc = nlp(text)
displacy.serve(doc, style='ent')
Exemple #5
0
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 18 19:27:42 2021

@author: acer
"""

###############################################################################
###############################################################################

import spacy
sp = spacy.load('en_core_web_sm')

sen = sp(u"I like to play football. I hated it in my childhood though")

for word in sen:
    print(
        f'{word.text:{12}} {word.pos_:{10}} {word.tag_:{8}} {spacy.explain(word.tag_)}'
    )
'''
Visualizing POS tags in a graphical way is extremely easy. 
The displacy module from the spacy library is used for this purpose. 
'''
from spacy import displacy

sen = sp(u"I like to play football. I hated it in my childhood though")
displacy.render(sen, style='dep', jupyter=True, options={'distance': 85})
displacy.serve(sen, style='dep', options={'distance': 120})

###############################################################################
###############################################################################
    }


# https://spacy.io/models/pl
# 3.0.0 / pl_core_news_md / pl_core_news_lg
# components : tok2vec, morphologizer, parser, tagger, senter, ner, attribute_ruler, lemmatizer
nlp = spacy.load('pl_core_news_lg')
nlp.pipeline
nlp.pipe_names

# doc = nlp("Prognozy wskazują, że w najbliższych dniach może zostać pobity w Polsce rekord zimna. Temperatura ma spaść nawet do –40 stopni Celsjusza. Ostatni raz podobne mrozy odnotowano w naszym kraju w 1929 roku.")
# doc = nlp("Naukowcy przyglądają się lodowcowi szelfowemu Brunt na Antarktydzie od początku 2019 r. Eksperci spodziewają się, że oderwanie gigantycznej góry lodowej nastąpi w najbliższym czasie, po tym jak pojawiły się nowe pęknięcia.")
doc = nlp(
    "Przykład oznacza, że jeżeli A ALE NIE B jest fałszywe lub prawodpodobnie prawdziwe wtedy C będzie definitywnie, ale niekoniecznie większe niż suma A i B."
)
displacy.serve(doc, style="dep")  # http://localhost:5000
displacy.serve(doc, style="ent")
# POS, lemmas http://stanza.run
# POS tagger API http://clarin.pelcra.pl/tools/tagger/ | https://krnnt-f3esrhez2q-ew.a.run.app/
pddoc = display_nlp(doc)
# print([(w.text, w.pos_) for w in doc])

nouns = [t.lemma_ for t in doc if t.pos_ in ['NOUN']]

patterns = ["POS:ADJ POS:NOUN:+"]
spans = textacy.extract.matches(doc, patterns=patterns)
print(*[s.lemma_ for s in spans], sep='|')

for col, values in extract_nlp(doc).items():
    print(f"{col}: {values}")

# showimg lemmas - root of the word, part of speech and token
def show_lemmas(text):
    for token in text:
        print(
            f'{token.text:{12}} {token.pos_:{6}} {token.lemma:<{22}} {token.lemma_}'
        )


show_lemmas(obama_lemma)
show_lemmas(trump_lemma)

# showing dependencies - an example of a sentence
# click html: http://127.0.0.1:5000
displacy.serve(obama_lemma, style='dep')

# click html: http://127.0.0.1:5000
displacy.serve(trump_lemma, style='dep')


# Named entity recognition
# segmenting a sentence to identify and extract entities
def entities(text):
    for ent in text.ents:
        print(ent.text + ' - ' + ent.label_ + ' - ' +
              str(spacy.explain(ent.label_)))


entities(obama_tokens)
entities(trump_tokens)
Exemple #8
0
def display_tree():
    global text
    displacy.serve(text, style="dep")
Exemple #9
0
import spacy
from spacy import displacy

nlp = spacy.load('en_core_web_sm')
text = u"""In ancient Rome, some neighbors live in three adjacent houses. In the center is the house of Senex, who lives there with wife Domina, son Hero, and several slaves, including head slave Hysterium and the musical's main character Pseudolus. A slave belonging to Hero, Pseudolus wishes to buy, win, or steal his freedom. One of the neighboring houses is owned by Marcus Lycus, who is a buyer and seller of beautiful women; the other belongs to the ancient Erronius, who is abroad searching for his long-lost children (stolen in infancy by pirates). One day, Senex and Domina go on a trip and leave Pseudolus in charge of Hero. Hero confides in Pseudolus that he is in love with the lovely Philia, one of the courtesans in the House of Lycus (albeit still a virgin)."""
doc = nlp(text)
sentence_spans = list(doc.sents)
displacy.serve(sentence_spans, style='dep')
import spacy
from spacy import displacy

nlp = spacy.load('en_core_web_lg')

doc_dep = nlp(u'This is a sentence.')

displacy.serve(doc_dep, style='dep')

doc_ent = nlp(
    u'When Sebastian Thrun started working on self-driving cars at Google '
    u'in 2007, few people outside of the company took him seriously.')

displacy.serve(doc_ent, style='ent')
Exemple #11
0
import spacy
from spacy import displacy

dp = spacy.load("models/model-final")
analysis = dp('C\' est de cette façon que des conditions socio-économiques radicalement nouvelles ( néolibéralisme ) ont été imposées durant les années 1980 à 1990 \.')
displacy.serve(analysis, style="dep", port=8888)
Exemple #12
0
piano_class_text = ('Great Piano Academy is situated'
                    ' in Mayfair or the City of London and has'
                    ' world-class piano instructors.')

#piano_class_doc = nlp(keyvalue_text)
#about_interest_doc = nlp(about_interest_text)
about_doc = nlp(keyvalue_text)
sentences = list(about_doc.sents)

#Break down the content in to sentences
#for sentence in sentences:
#  print(sentence)
#Tokenize the content from the sentences and index the tokens
#for token in about_doc:
#   print(token, token.idx, token.is_stop, token.tag_,token.pos_)
'''
displacy.serve(piano_class_doc,style='dep')
for ent in piano_class_doc.ents:
    print(ent.text, ent.start_char, ent.end_char,
          ent.label_, spacy.explain(ent.label_)) '''


def extract_full_name(nlp_doc):
    print(nlp_doc)
    pattern = [{'POS': 'PROPN'}, {'POS': 'PROPN'}]
    matcher.add('FULL_NAME', None, pattern)
    matches = matcher(nlp_doc)
    print(matches)
    for match_id, start, end in matches:
        span = nlp_doc[start:end]
        print(span.text)
        golden_heads = []
        parsing_info = []
        for title, para in d['context']:
            para_heads = [title, []]
            outputs = predictor.predict_batch_raw(para)
            for out in outputs:
                para_heads[1].append(out['predicted_heads'])
            golden_heads.append(para_heads)

            parsing_info.append([title, outputs])
        d['golden_head'] = golden_heads
        dep_dict = {'_id': d['_id'], 'parsing_info': parsing_info}
        dep_results.append(dep_dict)

    with open(args.output, 'w') as f:
        json.dump(data, f)
    with open(args.dep_output, 'wb') as f:
        pickle.dump(dep_results, f)

    # draw dep trees
    if args.draw:
        draw_idxs = random.sample(range(len(dep_results)), 5)
        draw_data = [
            get_display_form(out, 'allen') for idx in draw_idxs
            for t, outputs in dep_results[idx]['parsing_info']
            for out in outputs
        ]
        draw_idxs = random.sample(range(len(draw_data)), 10)
        draw_data = [draw_data[idx] for idx in draw_idxs]
        displacy.serve(draw_data, style='dep', manual=True)
        return wn.synset("kitchen_utensil.n.01") in self.hypernyms

    def soundsEdible(self):
        return wn.synset("food.n.01") in self.hypernyms or wn.synset(
            "food.n.02") in self.hypernyms


if __name__ == '__main__':

    ### Load spaCy's English NLP model
    #nlp = spacy.load('en')
    nlp = spacy.load('en_core_web_sm')

    ### The text we want to examine
    text = "portal"
    doc = nlp(u'Apple is looking at buying U.K. startup for $1 billion')
    ### Parse the text with spaCy
    ### Our 'document' variable now contains a parsed version of text.
    document = nlp(u"On the mantelpiece you make out a loaf of bread.")

    identifyInOnRelationships(document)

    for chunk in document.noun_chunks:
        print(chunk.text, chunk.root.text, chunk.root.dep_,
              chunk.root.head.text)
    ### print out all the named entities that were detected
    for token in document:
        print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
              token.dep)
    displacy.serve(document, style='dep', port=5000)
Exemple #15
0
import sys
import re

import spacy
from spacy import displacy

if __name__ == '__main__':
    #nlp = spacy.load("en_coref_md")
    nlp = spacy.load("en_core_web_sm")

    var1 = """
    American Airlines 'cannot' forecast costs of grounding of 737 MAX aircraft because of uncertainties
    American Airlines Group Inc. AAL, +0.03% disclosed Monday that its expects the Federal Aviation Administration's grounding of Boeing Co.'s BA, +1.02% 737 MAX aircraft to continue to cause "significant disruption" to its customers and financial costs to the airline. The company said, however, that the financial costs of the disruption "cannot be forecasted at this time," as they will depend on a number of factors, including the period of time of the grounding and the circumstances related to the reintroduction of the aircraft. American said in a filing with the SEC that its fleet included 24 Boeing MAX 8 aircraft, with an additional 76 aircraft on order. Prior to the grounding, it had been operating on average about 90 flights a day involving the grounded aircraft, with flight cancellations announced through April 24, so far. American's stock slipped 0.1% in premarket trade. It has shed 4.8% year to date, while Boeing shares have rallied 12.3%, the NYSE Arca Airline Index XAL, -0.33% has gained 4.5% and the Dow Jones Industrial Average DJIA, +0.23% has advanced 9.3%."""

    #Las Vegas Sands LVS, -0.70% will participate in the J.P. Morgan Gaming, Lodging, Restaurant & Leisure Management Access Forum in Las Vegas, NV on Thursday, March 14, 2019. Mr. Daniel Briggs, Senior Vice President Investor Relations, will participate in a discussion which is scheduled to begin at approximately 4:05 p.m. Pacific Time (7:05 p.m. Eastern Time).A webcast of the discussion may be accessed at the Investor Relations section of the company's website at www.sands.com.  About Las Vegas Sands Corp LVS, -0.70%Las Vegas Sands is the world's pre-eminent developer and operator of world-class Integrated Resorts. We deliver unrivaled economic benefits to the communities in which we operate.LVS created the meetings, incentives, convention and exhibition (MICE)-based Integrated Resort. Our industry-leading Integrated Resorts provide substantial contributions to our host communities including growth in leisure and business tourism, sustained job creation and ongoing financial opportunities for local small and medium-sized businesses.Our properties include The Venetian and The Palazzo resorts and Sands Expo in Las Vegas, Sands Bethlehem in Eastern Pennsylvania, and the iconic Marina Bay Sands in Singapore. Through majority ownership in Sands China Ltd., we have developed the largest portfolio of properties on the Cotai Strip in Macao, including The Venetian Macao, The Plaza and Four Seasons Hotel Macao, Sands Cotai Central and The Parisian Macao, as well as the Sands Macao on the Macao Peninsula.LVS is dedicated to being a good corporate citizen, anchored by the core tenets of serving people, planet and communities.  We deliver a great working environment for 50,000 team members worldwide, drive social impact through the Sands Cares charitable giving and community engagement program and lead in environmental performance through the award-winning Sands ECO360 global sustainability program. To learn more, please visit www.sands.com.Contacts:\nInvestment Community:\nDaniel Briggs\n(702) 414-1221Media:\nRon Reese\n(702) 414-3607 View original content to download multimedia:http://www.prnewswire.com/news-releases/las-vegas-sands-to-participate-in-the-2019-jp-morgan-gaming-lodging-restaurant--leisure-management-access-forum-300812797.htmlSOURCE Las Vegas SandsCopyright (C) 2019 PR Newswire. All rights reserved", "title": "Las Vegas Sands to Participate in the 2019 J.P. Morgan Gaming, Lodging, Restaurant & Leisure Management Access Forum"""

    var1 = re.sub(r', [+-]\d+\.\d+%', "", var1)
    #var1 = var1.replace("%", " percent")

    doc = nlp(var1)
    sentence_spans = list(doc.sents)
    displacy.serve(sentence_spans, style="dep")
###########################################################
#Syntactic dependency
ex3 = nlp('Sally likes Sam')
for word in ex3:
    print(word.text,word.tag_,word.pos_,word.dep_)   #word.dep_ for finding dependency

print(spacy.explain('advmod'))
print(spacy.explain('root'))
print(spacy.explain('dobj'))



##############################################################
from spacy import displacy
ex3 = nlp('Tajmahal is an iconic monument in India')
displacy.serve(ex3,style = 'dep')  #dependency
displacy.serve(ex3,style = 'ent')  # named entity recognization


displacy.serve(ex3,style = 'ent',options = {'compact' : True,'bg':'cornflowerblue','font': 'Sans-Serif'}) 


##################################################################
#Lemmatization
docx = nlp('study studying studious studio student')
for word in docx:
    print(word.text,word.lemma_,word.pos_)


docx = nlp('walks walking walker walked good better best')
for word in docx:
universalPatterns = [nlp(text) for text in universalQuantifiers]
phraseMatcher.add("Immeasurable Quantifiers", match_universal_quantifier, *universalPatterns)

temporalDependencies = ["eventually", "before", "when", "after", "as", "once", "earliest", "latest", "instantaneous", "simultaneous", "while", "at last"]
temporalPatterns = [nlp(text) for text in temporalDependencies]
phraseMatcher.add("Temporal Dependencies", match_temporal, *temporalPatterns)

Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
filename = askopenfilename()

f = open(filename, "r")
inputText = f.read()

doc = nlp(inputText)
matches = matcher(doc)
lowercaseDoc = nlp(inputText.lower())
phraseMatches = phraseMatcher(lowercaseDoc)

match_ents.sort(key=lambda x: x["start"])

options = {"colors": {"ADVERB": "orange", "PASSIVE VOICE": "yellow", "ADJECTIVE": "purple", "INFINITIVE": "green", "INDEFINITE ARTICLE": "blue"}}

url = 'http://0.0.0.0:5000'

webbrowser.open_new_tab(url)

# Open URL in new window, raising the window if possible.
webbrowser.open_new(url)

displacy.serve([{"text": doc.text, "ents": match_ents}], style="ent", manual=True, options = options)
Exemple #18
0
Average daily earnings for capesizes, which typically transport 170,000-180,000 tonne cargoes including iron ore and coal, lost $361 to $2,660.

The panamax index rose 11 points, or 2.1%, to 541.

However, the index has declined over 5.7% this week and registered its third consecutive week of losses.

Average daily earnings for panamaxes, which usually carry coal or grain cargoes of about 60,000 tonnes to 70,000 tonnes, increased $98 to $4,871.

Australian iron ore miners such as Rio Tinto and BHP Group, who are among China’s biggest suppliers, said they were preparing for a tropical cyclone that will hit Australia over the weekend.

Port Hedland, which is the world’s biggest export point for iron ore, and Port of Dampier will be shutting down operations due to the cyclone in Australia.

The supramax index fell 15 points to 491.
Source: Reuters (Reporting by Anjishnu Mondal in Bengaluru; Editing by Krishna Chandra Eluri)
""")
for token in doc:
    print(token.text, token.pos_, token.dep_)
print("--------------------------------")
for token in doc:
    print(
        "Text:{0}\tidx:{1}\tlemma_:{2}\tis_punct:{3}\tis_space:{4}\tshape_:{5}\tpos_:{6}\ttag_:{7}"
        .format(token.text, token.idx, token.lemma_, token.is_punct,
                token.is_space, token.shape_, token.pos_, token.tag_))
print("--------------------------------")
for sent in doc.sents:
    print(sent)

print("--------------------------------")

displacy.serve(doc, style='ent', host='localhost', page=True)
Exemple #19
0
import spacy
from spacy import displacy

nlp = spacy.load('en_core_web_sm')
with open('../UPDATED_NLP_COURSE/TextFiles/peterrabbit.txt') as f:
    doc = nlp(f.read())
list_sents = [nlp(sent.text) for sent in doc.sents]
list_ners = [doc for doc in list_sents if doc.ents]
print(len(list_ners))
displacy.serve(list_sents[0], style='ent')
Exemple #20
0
import spacy
nlp = spacy.load('en')
doc = nlp(
    u"In 2011, Google launched Google +, its fourth foray into social networking."
)
doc.user_data['title'] = "An example of an entity visualization"
from spacy import displacy
options = {
    'ents': ["ORG", "PRODUCT", "DATE"],
    'colors': {
        "ORG": "aqua",
        "PRODUCT": "aqua"
    }
}
displacy.serve(doc, style='ent', options=options)
Exemple #21
0
 def display_dependencies(self, utterance):
     doc = self.nlp(utterance)
     displacy.serve(doc, style='dep')
Exemple #22
0
import spacy
from spacy import displacy

# Load English core model
nlp = spacy.load("en_core_web_sm")

# Load policies from local file
f = open("../policy_pages_873/929nin.com.txt", "r")
doc = f.read()

# Apply NLP with the en_core model
doc = nlp(doc)

# Extract annotated sentences
sentence_spans = list(doc.sents)  # [1] <- limit sentences if necessary

# Display the setence analyses per sentence as svg tree, served via localhost:5000
displacy.serve(sentence_spans,
               style="dep",
               options={
                   "compact": True,
                   "bg": "#fff",
                   "color": "black",
                   "font": "Palatino"
               })
# alternative serve(doc, ...) compact: False
def parse_sentence_for_dependencies(sentence) -> None:
    """Builds a visualisation for the dependency tree at http://127.0.0.1:5000"""
    return displacy.serve(sentence, style="dep")
Exemple #24
0
                 ' Language Processing". There is a helpline number '
                 ' available at +1-1234567891. Gus is helping organize it.'
                 ' He keeps organizing local Python meetups and several'
                 ' internal talks at his workplace. Gus is also presenting'
                 ' a talk. The talk will introduce the reader about "Use'
                 ' cases of Natural Language Processing in Fintech".'
                 ' Apart from his work, he is very passionate about music.'
                 ' Gus is learning to play the Piano. He has enrolled '
                 ' himself in the weekend batch of Great Piano Academy.'
                 ' Great Piano Academy is situated in Mayfair or the City'
                 ' of London and has world-class piano instructors.')

complete_doc = nlp(complete_text)
# Remove stop words and punctuation symbols
words = [
    token.text for token in complete_doc
    if not token.is_stop and not token.is_punct
]
word_freq = Counter(words)
# 5 commonly occurring words with their frequencies
common_words = word_freq.most_common(5)
print('common words')
print(common_words)

# Iterate over the tokens
for token in complete_doc:
    # Print the token and its part-of-speech tag
    print(token.text, "-->", token.pos_)

displacy.serve(complete_doc, style="ent")
Exemple #25
0
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")

def parse_raw_text(text):
    """
    :param text: raw text
    :return: spacy parse tree
    """
    return nlp(text)




if __name__ == "__main__":
    print(str([item.dep_ for item in parse_raw_text("she is beautiful")]))
    print(str([item.dep_ for item in parse_raw_text("he who traveled in china is granted access of the company by john")]))
    displacy.serve(parse_raw_text("he who traveled in china is granted access of the company by john"), style="dep")
Exemple #26
0
# ents example

import spacy
from spacy import displacy

nlp = spacy.load('3gm_ner_model')
text = '''«Διάσπαση του Υπουργείου Εσωτερικών, Αποκέντρωσης και Ηλεκτρονικής Διακυβέρνησης στα Υπουργεία: α) Εσωτερικών και β) Διοικητικής Μεταρρύθμισης και Ηλεκτρονικής Διακυβέρνησης, συγχώνευση των Υπουργείων Οικονομίας, Ανταγωνιστικότητας και Ναυτιλίας και Θαλάσσιων Υποθέσεων, Νήσων και Αλιείας στο Υπουργείο Ανάπτυξης, Ανταγωνιστικότητας και Ναυτιλίας και μεταφορά στον Πρωθυπουργό των Γενικών Γραμματειών Ενημέρωσης και Επικοινωνίας και στο Υπουργείο Παιδείας, Δια Βίου Μάθησης και Θρησκευμάτων της Γενικής Γραμματείας Νέας Γενιάς» (ΦΕΚ Α΄ 147)'''
doc = nlp(text)
displacy.serve(doc, style="ent")
Asian shares skidded on Tuesday after a rout in tech stocks put Wall Street to the sword, while a 
sharp drop in oil prices and political risks in Europe pushed the dollar to 16-month highs as investors dumped 
riskier assets. MSCI’s broadest index of Asia-Pacific shares outside Japan dropped 1.7 percent to a 1-1/2 
week trough, with Australian shares sinking 1.6 percent. Japan’s Nikkei dived 3.1 percent led by losses in 
electric machinery makers and suppliers of Apple’s iphone parts. Sterling fell to $1.286 after three straight 
sessions of losses took it to the lowest since Nov.1 as there were still considerable unresolved issues with the
European Union over Brexit, British Prime Minister Theresa May said on Monday.'''

import spacy

nlp = spacy.load("en_core_web_sm")
document = nlp(article)

print('Original Sentence: %s' % (article))

for element in document.ents:
    print(element.text + ' - ' + element.label_ + ' - ' +
          str(spacy.explain(element.label_)))

from spacy import displacy
sp = spacy.load('en_core_web_sm')

sen = sp(
    u'Manchester United is looking to sign Harry Kane for $90 million. David demand 100 Million Dollars'
)
displacy.render(sen, style='ent', jupyter=True)
displacy.serve(sen, style='ent')

###############################################################################
###############################################################################
Exemple #28
0
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc1 = nlp("This is a sentence.")
doc2 = nlp("This is another sentence.")
displacy.serve([doc1, doc2], style="dep")
Exemple #29
0
import spacy
from spacy import displacy

nlp = spacy.load('en')
docInput = input("Enter a sentence:  ")
doc = nlp(docInput)
for token in doc:
    print(str(token) + " " + token.dep_)
    print(str(token) + " " + token.pos_)
displacy.serve(doc, style='dep')
Exemple #30
0
def visualize(doc):
    colors = {"ORG": "green", "PERSON":"yellow"}
    options = {"colors": colors}
    displacy.serve(doc, style='ent', options=options)