Python DeepSegment.segment Examples, deepsegment.DeepSegment.segment Python Examples

Example #1

0

Show file

def transcript():

    if request.method == "POST":
        transcript_path = request.form.get('transcript_path')

        with open(transcript_path) as file:
            text = file.read()

        segmenter = DeepSegment('deepsegment_eng_v1/config.json')

        var = segmenter.segment(text)

        operational_1 = operational(var)

        length_chat_file = len(var)
        length_operational = len(operational_1)
        length_non_operational = length_chat_file - length_operational

   
        # Call function to create a pie chart showing Operational vs Non-Operational Problems
        # draw_figure(length_operational, length_non_operational)

        return render_template("showgraph.html", length_operational = length_operational, length_non_operational = length_non_operational)
        
    else:
        return redirect(url_for("index"))

Example #2

0

Show file

def main():

    args = parse_args()

    with open(args.input, mode='r') as read_text_file:
        line = read_text_file.readline()

        segmenter = DeepSegment('en')
        corrector = DeepCorrect(args.params_path, args.checkpoint_path)

    with open(args.output, mode='w') as close_text_file:
        for part in segmenter.segment(line):
            tester2 = corrector.correct(part)
            close_text_file.write(tester2[0]['sequence'] + '\n')

Example #3

0

Show file

def processing(id):
    paragraph_object = Paragraph.objects.get(id=id)
    if not hasattr(globals, 'corrector') and not hasattr(globals, 'segmenter'):
        segmenter = DeepSegment('en')
        corrector = DeepCorrect('deep_punc/deeppunct_params_en',
                                'deep_punc/deeppunct_checkpoint_wikipedia')
        globals.corrector = corrector
        globals.segmenter = segmenter
    else:
        corrector = globals.corrector
        segmenter = globals.segmenter

    list_of_sentences = segmenter.segment(paragraph_object.original_text)
    paragraph = ''
    for i in range(len(list_of_sentences)):
        sentence = corrector.correct(list_of_sentences[i])
        if i == 0:
            paragraph += sentence[0]['sequence']
        else:
            paragraph += ' ' + sentence[0]['sequence']
    paragraph = paragraph.replace("\\", "")
    paragraph_object.processed_text = paragraph
    paragraph_object.processing = False
    paragraph_object.save()

Example #4

0

Show file

 def predict(self, sample_text, word_length, segment, verbose):               #A text seed is provided
 
     '''Predicts the next text sequences'''
     #model = self.model    
     for wordLength in range(word_length):   #Generates a text with a range of word length
         tokenList = self.tokenizer.texts_to_sequences([sample_text])[0]  #Turns the seed into sequences
         tokenList = pad_sequences([tokenList], maxlen=self.maxSequenceLen - 1, padding=self.padding_method)
         predicted = self.model.predict_classes(tokenList, verbose=verbose) #Predicts the next sequence(generated
         outputWord = " "                                         #text)  
         for word, index in self.tokenizer.word_index.items():
             if index == predicted:
                 outputWord = word
                 break
         sample_text += " " + outputWord
         #Returns the seed plus generated text
     self.sample_text = sample_text
     if segment == True:
         segmenter = DeepSegment('en')
         result = segmenter.segment(self.sample_text)
         sample_text = result
     else:
         print(sample_text)
         sample_text = self.sample_text
     return sample_text

Example #5

0

Show file

File: get_emotions_september.py Project: zhaihulu/NLP_Radicalization_detection


"""

if __name__ == "__main__":

    video_id = sys.argv[1]

    vader_flag = 1
    #print(get_transcript())
    #get_comments_clean_and_organise()

    print("TRANSCRIPT" + "\n")
    transcript_li = get_transcript(video_id)

    segmented_text_li = segmenter.segment(" ".join(transcript_li))
    print(segmented_text_li)

    str_text = ". ".join(
        segmented_text_li
    )  #converting into a single string to easily pass to watson tone analyzer
    #print(str_text)

    print("NRC")
    sent_by_sent_transcript_li = segmented_text_li
    normalized_counts_transcript, Arousal = normalized_emotion_counts(
        sent_by_sent_transcript_li, vader_flag)
    print("Normalized emotion counts of transcript: \n")
    print(normalized_counts_transcript)
    print("Arousal Score: " + str(Arousal))

Example #6

0

Show file

    splitter = NNSplit("de")

    res = splitter.split([data])

# =============================================================================
# More advanced: Deepsegment: Does not support German
# =============================================================================
if False:
    from deepsegment import DeepSegment
    # The default language is 'en'
    segmenter = DeepSegment('de')

    with open('data/start.txt', 'r') as myfile:
        data = myfile.read()

    segmenter.segment('I am Batman i live in gotham')

# =============================================================================
# Huggingface tokenizer
# =============================================================================

if False:
    from tokenizers.implementations import ByteLevelBPETokenizer
    from tokenizers.processors import BertProcessing
    from pathlib import Path

    tokenizer = ByteLevelBPETokenizer(
        "data/german_old.json",
        "data/german_old.txt",
    )

Example #7

0

Show file

)

"""
text = 'Team, I know that times are tough! Product '\
    'sales have been disappointing for the past three '\
    'quarters. We have a competitive product, but we '\
    'need to do a better job of selling it!'

"""

li_B= ['since 1990 the number of gun deaths', 'worldwide has reached six point five', 'million three quarters of gun deaths', 'occur in just 15 countries Latin America', 'is home to some of the worlds most', 'violent countries by murder rate El', 'Salvador Venezuela and Guatemala are the', 'top three countries for deaths caused by', 'guns per population these Latin American', 'countries are marred by corruption', 'organized crime and a dysfunctional', 'criminal justice system that further', 'fuels the problem the availability of', 'guns in the United States is another', 'concern for these countries an estimated', '200,000 guns a year that were first sold', 'in the United States are smuggled over', 'the southern border and used in violent', 'crimes in Latin America and the', 'Caribbean in the United States the', 'constitutional right to bear arms has', 'led to looser regulations and easier', 'access to firearms this contributes to', 'the 30,000 men women and children who', 'were killed with guns each year mass', 'shootings attract their headlines but in', 'fact these make up only 0.2% of gun', 'deaths 60% of gun related deaths are in', 'fact suicide', "America's suicide rate increased by 25", 'percent between 1999 and 2015 of nearly', '45,000 taking their own lives in 2015', 'alone half of these suicides were', "carried out with guns though guns aren't", 'the most common method of suicide they', 'are the most lethal other wealthy', 'countries have far lower rates of gun', 'violence in Japan if you want to own a', 'gun you must pass a written exam and a', 'shooting range test alongside a series', 'of mental health drug in criminal record', 'tests', 'it has virtually eradicated gun crime', 'after a mass shooting in 1996 Australia', 'introduced an effective buyback scheme', 'of firearms in the 20 years following', 'the bag there was an accelerated decline', 'in total gun deaths but in America the', 'House of Representatives has not voted', 'on a single measure to prevent gun', 'violence and in some states such as', 'Texas where students at public colleges', 'can now carry concealed handguns the law', 'has actually loosened easy access to', 'firearms will continue to be the main', 'driver of Americas gun debt']




text_B= segmenter.segment(" ".join(li_B) )

str_text_B= ". ".join(text_B)


#print(str_text_B)


str_text_C= "The journalist is twisting the facts and reporting fake news. The boy is twisting the rope on the swing. The deal was an unfortunate twist in events."

print("PARALLELDOTS")
a= "The journalist is twisting the facts and reporting fake news."
b= "The boy is twisting the rope on the swing." 
c= "The deal was an unfortunate twist in events." 

print(a)

Example #8

0

Show file

def fn(test):
    
    from deepsegment import DeepSegment
    segmenter=DeepSegment('en')
    import textrazor
    textrazor.api_key = "043e170ef41a6d297a508581225bd493943f3a9f831345fb71f86d64"

    client = textrazor.TextRazor(extractors=["words", "relations"])
    #client.set_do_cleanup_HTML(True)

    response = client.analyze(test)
    l=[]

    for property in response.properties():
        for word in property.predicate_words:
            l.append(word.lemma)
            if word.lemma == "sound":
                for property_word in property.property_words:
                    for phrase in property_word.noun_phrases:
                        print (phrase)
                break
    l=[]
    flag=False
    for sentence in response.sentences():
        print(sentence.words)
        for word in sentence.words:
            if word.lemma=="image" or word.lemma=="picture" or word.lemma=="photo" or word.lemma=="show" or word.lemma=="see" or word.lemma=="display":
                k=word.lemma
                flag=True 
            l.append(word.lemma)
    astring=""
    for i in l:
        astring+=i+" "

    f=open("keyword.txt",'a')
    f.write(astring+"\n")
    f.close()
    alist=segmenter.segment(astring)
    print(alist)

    if(flag):
        s=l.index(k)
        m=l[s:]
    
        t=""
        st=""
        for i in m:
            t+=i+" "
    else:
        t="No image found"
        st=""
        for j in l:
            st+=j+" "

    text1=st
    text2=t

    print(t)
    response1=client.analyze(t)

    for noun in response1.noun_phrases():
        print(noun.words)
        for word in noun.words:
            print(word.lemma)


    from requests import exceptions
    import argparse
    import requests
    import cv2
    import os
    import time

    starttime=time.time();
    

    # set your Microsoft Cognitive Services API key along with (1) the
    # maximum number of results for a given search and (2) the group size
    # for results (maximum of 50 per request)
    API_KEY = "948886a19a794c428c53fcfa2aa0325b"
    MAX_RESULTS = 1
    GROUP_SIZE = 1
    
    # set the endpoint API URL
    URL = "https://api.cognitive.microsoft.com/bing/v7.0/images/search"

    # when attempting to download images from the web both the Python
    # programming language and the requests library have a number of
    # exceptions that can be thrown so let's build a list of them now
    # so we can filter on them
    EXCEPTIONS = set([IOError, FileNotFoundError,
        exceptions.RequestException, exceptions.HTTPError,
        exceptions.ConnectionError, exceptions.Timeout])


    # store the search term in a convenience variable then set the
    # headers and search parameters
    term = t
    headers = {"Ocp-Apim-Subscription-Key" : API_KEY}
    params = {"q": term, "offset": 0, "count": GROUP_SIZE}
    
    # make the search
    print("[INFO] searching Bing API for '{}'".format(term))
    search = requests.get(URL, headers=headers, params=params)
    search.raise_for_status()
    
    # grab the results from the search, including the total number of
    # estimated results returned by the Bing API
    results = search.json()
    estNumResults = min(results["totalEstimatedMatches"], MAX_RESULTS)
    print("[INFO] {} total results for '{}'".format(estNumResults,
        term))
    
    # initialize the total number of images downloaded thus far
    total = 0


    for offset in range(0, estNumResults, GROUP_SIZE):
        # update the search parameters using the current offset, then
        # make the request to fetch the results
        print("[INFO] making request for group {}-{} of {}...".format(
            offset, offset + GROUP_SIZE, estNumResults))
        params["offset"] = offset
        search = requests.get(URL, headers=headers, params=params)
        search.raise_for_status()
        results = search.json()
        print("[INFO] saving images for group {}-{} of {}...".format(
            offset, offset + GROUP_SIZE, estNumResults))
            # loop over the results
        for v in results["value"]:
            # try to download the image
            try:
                # make a request to download the image
                print("[INFO] fetching: {}".format(v["contentUrl"]))
                r = requests.get(v["contentUrl"], timeout=30)
    
                # build the path to the output image
                ext = v["contentUrl"][v["contentUrl"].rfind("."):]
                p = os.path.sep.join([r"C:\Users\HP\Desktop\Projects\VIT Hack\SlideEZ-test", "{}{}".format(
				str(total).zfill(8), ext)])

                print("The answer is")
                print(p)
    
                # write the image to disk
                f = open(p, "wb")
                f.write(r.content)
                f.close()
    
            # catch any errors that would not unable us to download the
            # image
            except Exception as e:
                # check to see if our exception is in our list of
                # exceptions to check for
                if type(e) in EXCEPTIONS:
                    print("[INFO] skipping: {}".format(v["contentUrl"]))
                    continue
            # try to load the image from disk
            image = cv2.imread(p)

            # if the image is `None` then we could not properly load the
            # image from disk (so it should be ignored)
            if image is None:
                print("[INFO] deleting: {}".format(p))
                os.remove(p)
                continue

            # update the counter
            total += 1
    endtime=time.time()-starttime
    print("Total time taken to search for the query is")
    print(endtime)

    from pptx import Presentation
    from pptx.util import Inches, Pt 
    from pptx.enum.text import PP_ALIGN
    from PIL import Image
    from pptx.dml.color import RGBColor
    from pptx.enum.dml import MSO_THEME_COLOR

    presentation = "testppt3.pptx"
    prs = Presentation(presentation)
    if len(prs.slides)==0:
        title_slide_layout = prs.slide_layouts[0]
        slide = prs.slides.add_slide(title_slide_layout)
        background=slide.background
        fill=background.fill
        fill.gradient()
        fill.gradient_angle=40
        gradient_stops=fill.gradient_stops
        gradient_stop=gradient_stops[0]
        color=gradient_stop.color
        color.theme_color=MSO_THEME_COLOR.LIGHT_1
        title = slide.shapes.title
        subtitle = slide.placeholders[1]
        title.text = "Test"
        subtitle.text = "test"
        prs.save(presentation)
    if not flag:

        text_slide_layout = prs.slide_layouts[1]
        slide = prs.slides.add_slide(text_slide_layout)
        background=slide.background
        fill=background.fill
        fill.gradient()
        fill.gradient_angle=40
        gradient_stops=fill.gradient_stops
        gradient_stop=gradient_stops[0]
        color=gradient_stop.color
        color.theme_color=MSO_THEME_COLOR.LIGHT_1
        title = slide.shapes.title
        blist=[]
        for i in range(0,len(alist)):
            blist+=alist[i].split(" ")

        mx=0
        slide_t=""
        for j in blist:
            if(len(j)>=mx):
                mx=len(j)
                slide_t=j.title()
            
        title.text= slide_t
        content = slide.shapes.placeholders[1]
        tf = content.text_frame
        for i in alist:
            para=tf.add_paragraph()
            para.text=i
            para.level=1
        prs.save(presentation)
    else:

        image_slide_layout = prs.slide_layouts[8]
        slide = prs.slides.add_slide(image_slide_layout)
        background=slide.background
        fill=background.fill
        fill.gradient()
        fill.gradient_angle=40
        gradient_stops=fill.gradient_stops
        gradient_stop=gradient_stops[0]
        color=gradient_stop.color
        color.theme_color=MSO_THEME_COLOR.LIGHT_1
        #title = slide.shapes.title
        #title.text="Sub2"
        content = slide.shapes.placeholders[1]
        im=Image.open(p)
        width,height= im.size
        content.height= height
        content.width= width
        content.insert_picture(p)
        content = slide.shapes.placeholders[0]
        tf = content.text_frame
        for i in alist:
            
            para=tf.add_paragraph()
            para.text=i
            para.level=1
            para.alignment=PP_ALIGN.CENTER
        #left = Inches(6)
        #top = Inches(3)
        #height = Inches(2)
        #pic = slide.shapes.add_picture(p, left, top, height=height)
        prs.save(presentation)

Example #9

0

Show file

 def segmentsent(self, text):
     segmenter = DeepSegment('en')
     result = segmenter.segment(text)
     return result

Example #10

0

Show file

from deepsegment import DeepSegment
seg = DeepSegment('en')
lt = seg.segment(
    'today i have to talk about IC 741 it has 14 pins pin number 2 is missing in the slide please note '
)

Example #11

0

Show file

File: benchmark_native.py Project: shifthex/fastDeploy

from json import dump
from time import time
from deepsegment import DeepSegment

model = DeepSegment("en")

example = [
    "I was hungry i ordered a pizza and i went to the movies which movie did you go to i watched dark knight rises oh how was it it was a good movie yeah thought so"
]

# Warmup
for _ in range(3):
    print("Expected result:", model.segment(example, batch_size=1))

# Expected result is [['I was hungry', 'i ordered a pizza and i went to the movies', 'which movie did you go to', 'i watched dark knight rises', 'oh how was it', 'it was a good movie', 'yeah thought so']]

in_data = list(example * 8192)

for batch_size in [1, 32, 128]:

    dump({'data': in_data[:batch_size]}, open(f"{batch_size}.json", "w"))

    start = time()
    results = model.segment(in_data, batch_size)
    end = time()
    print(
        f"\nBatch Size:{batch_size}  Total Time:{end - start} per {len(in_data)} examples."
    )

Example #12

0

Show file

    'it has virtually eradicated gun crime',
    'after a mass shooting in 1996 Australia',
    'introduced an effective buyback scheme',
    'of firearms in the 20 years following',
    'the bag there was an accelerated decline',
    'in total gun deaths but in America the',
    'House of Representatives has not voted',
    'on a single measure to prevent gun',
    'violence and in some states such as',
    'Texas where students at public colleges',
    'can now carry concealed handguns the law',
    'has actually loosened easy access to',
    'firearms will continue to be the main', "driver of America's gun debt"
]

segmented_li = segmenter.segment(" ".join(li_B))

text_B = ". ".join(segmented_li)

tone_analysis_B = tone_analyzer.tone(
    {
        'text': text_B
    }, content_type='application/json').get_result()

print("B: \n")
print(tone_analysis_B)
print("\n")
print(json.dumps(tone_analysis_B, indent=2))

#NRC Emotions