Python Punctuator.punctuate Exemples, punctuator.Punctuator.punctuate Python Exemples

Exemple #1

0

Afficher le fichier

def punctuate(text):
    p = Punctuator(
        "C:\\Users\\lm44\\Documents\\Code\\Python\\Sumit Backend\\functions\\INTERSPEECH-T-BRNN.pcl"
    )
    punctuated = p.punctuate(text)

    return punctuated

Exemple #2

0

Afficher le fichier

Fichier : lecture_summarizer.py Projet : anurag-deore/meetify

def punctuate_text(text):
    print("Performing Puntuation ... \n")
    p = Punctuator('models/punctuator1.pcl')
    new_text = p.punctuate(text)
    new_text = re.sub("[?:;,]", "", new_text)
    new_text = re.sub("\s\s+", " ", new_text)
    print("Original text is:\n")
    print(new_text)
    print("\n\n")
    return new_text

Exemple #3

0

Afficher le fichier

Fichier : corrector.py Projet : ArtiomTkachuk1/OneTouchSpeechToText

def correct(begin_of_path,text,language="English"):
    #text is currently raw string
    words=text.split(" ")
    correct_words=[]
    spell = SpellChecker()
    for word in words:
        correct_words.append(spell.correction(word))
    separator = ' '
    correct_text=separator.join(correct_words)
    path_to_model=os.path.join(begin_of_path,"data","Demo-Europarl-EN.pcl")
    p = Punctuator(path_to_model)
    correct_text_with_punct=p.punctuate(correct_text)
    return(correct_text_with_punct)

Exemple #4

0

Afficher le fichier

Fichier : final.py Projet : Sam-Tech-2543/Transcribo

def punctuates(doo=True):
    if doo == True:
        text = open("transcription.txt", "r")
        text = text.read()
        from punctuator import Punctuator

        p = Punctuator('hel.pcl')
        punctuated = p.punctuate(text)
        print("Punctuating Done")
        return punctuated
    else:
        text = open("transcription.txt", "r")
        text = text.read()
        return text

Exemple #5

0

Afficher le fichier

Fichier : genPunctuations.py Projet : pratheekshadr/MinorProject

def addPunctuation(text_file):
    #load the pre-trained model
    p = Punctuator('model.pcl')

    #read unstructured text from the file
    fp = open(text_file, "r")
    text = fp.read()

    #punctuate the read text
    sentences = p.punctuate(text)
    fp.close()

    #write punctuated text into the file
    otp_file = open("notes.txt", "w")
    otp_file.write(sentences)
    otp_file.close()

Exemple #6

0

Afficher le fichier

Fichier : punctuation.py Projet : lucascrlsn/hello

def punctuate():
    global filename
    global PCL
    t = open('****************', 'r')
    file = t.read()
    source = file
    # Punctuate
    timestamp = datetime.datetime.strptime(time.ctime(),
                                           "%a %b %d %H:%M:%S %Y")
    print(f'{timestamp} | Punctuating chunk')
    p = Punctuator('****************')
    timestamp = datetime.datetime.strptime(time.ctime(),
                                           "%a %b %d %H:%M:%S %Y")
    print(f'{timestamp} | Saving your file')
    t.write(p.punctuate(source))
    timestamp = datetime.datetime.strptime(time.ctime(),
                                           "%a %b %d %H:%M:%S %Y")
    print(f'{timestamp} | Punctuation complete')

Exemple #7

0

Afficher le fichier

Fichier : views.py Projet : EasyPassApps/ytscribe

def test(request):
    video_id = "5v1B1R3lEO8"
    srt = YouTubeTranscriptApi.get_transcript(video_id)
    alltext = ""
    for item in srt:
        alltext = alltext + item['text'] + " "
    print(os.path.join(settings.BASE_DIR))
    file_ = os.path.join(settings.BASE_DIR, 'model.pcl')
    p = Punctuator(file_)
    punctuated = p.punctuate(alltext[:5000])
    specialtag = punctuated.split('.')
    # newpara = ""
    # for item in specialtag:
    # 	newpara = newpara + item+".<br>"

    #specialtag = alltext

    return render(request, 'test.html', {
        'foo': specialtag,
    })

Exemple #8

0

Afficher le fichier

def fix_text(text_list, is_saved):
    """
    Cleans, punctuates, neural coreferences, and sentencizes the transcript.
    :param is_saved: True if a version of the fixed text is already saved in a file
    :param text_list: A list of strings; an 'unclean' transcript
    :return: A list of tokenized sentences (every sentence is a Doc object)
    """
    file_name = 'fixed.txt'

    if is_saved:
        with open(file_name, 'r') as fixed:
            fixed_text_list = fixed.readlines()
        fixed_text_list = [text.replace('\n', '') for text in fixed_text_list]
        fixed_text_list = [nlp(sentence) for sentence in fixed_text_list]
        return fixed_text_list

    else:
        fixed_text = ' '.join(text_list)  # convert the list into one string
        fixed_text.replace('  ', ' ')  # remove double spaces

        print('adding punctuation; please wait a few minutes...')
        punctuator = Punctuator('Demo-Europarl-EN.pcl')
        fixed_text = punctuator.punctuate(fixed_text)

        print('removing interjections; please wait a few more minutes...')
        fixed_text_doc = remove_tokens_by_pos(nlp(fixed_text), 'INTJ')

        print(
            'performing neural coreferencing; please wait for several more minutes...'
        )
        neuralcoref.add_to_pipe(nlp)
        fixed_text_doc = fixed_text_doc._.coref_resolved

        print('splitting the text into sentences; please keep waiting...')
        fixed_text_list = re.split('\\.|\\?|!', fixed_text_doc)

        with open(file_name, 'w') as fixed:
            for sentence in fixed_text_list:
                fixed.write(sentence + "\n")
        fixed_text_list = [nlp(sentence) for sentence in fixed_text_list]
        return fixed_text_list

Exemple #9

0

Afficher le fichier

Fichier : app.py Projet : Protart/Video-summariser

def summarize(vid_id):

    text = """ """
    subs = YouTubeTranscriptApi.get_transcript(vid_id)
    sentences = [i['text'] for i in subs]
    text = ' '.join(sentences)
    p = Punctuator('INTERSPEECH-T-BRNN.pcl')
    text = p.punctuate(text)

    stopWords = set(stopwords.words("english")) 
    words = word_tokenize(text) 
    freqTable = dict() 
    for word in words: 
        word = word.lower() 
        if word in stopWords: 
            continue
        if word in freqTable: 
            freqTable[word] += 1
        else: 
            freqTable[word] = 1
    sentences = sent_tokenize(text) 
    sentenceValue = dict() 
    for sentence in sentences: 
        for word, freq in freqTable.items(): 
            if word in sentence.lower(): 
                if sentence in sentenceValue: 
                    sentenceValue[sentence] += freq 
                else: 
                    sentenceValue[sentence] = freq 
    sumValues = 0
    for sentence in sentenceValue: 
        sumValues += sentenceValue[sentence] 
    average = int(sumValues / len(sentenceValue)) 
    summary = '' 
    for sentence in sentences: 
        if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)): 
            summary += " " + sentence 
    return summary

Exemple #10

0

Afficher le fichier

Fichier : una_mom.py Projet : Gaurav-71/UNA-Unisys-Natural-Assistant

def main(fileName):
    fileName, fileExt = fileName.split('.')
    print(fileName, fileExt)
    # os.system(f"ffmpeg -i C:\\Users\\Chinmay\\CSE\\pyCode\\Unysis\\UNA-Unisys-Natural-Assistant-main\\UNA-Unisys-Natural-Assistant-main\\{fileName}.{fileExt} -ab 160k -ac 2 -ar 44100 -vn C:\\Users\\Chinmay\\CSE\\pyCode\\Unysis\\UNA-Unisys-Natural-Assistant-main\\UNA-Unisys-Natural-Assistant-main\\{fileName}.wav")

    # ipFile = ffmpeg.input(fileName + fileExt)
    # opFile = ffmpeg.output(ipFile, fileName + ".wav")

    clip = AudioFileClip(f"{fileName}.{fileExt}")
    clip.write_audiofile(f"{fileName}.wav", codec='pcm_s16le')

    f = sf.SoundFile(f'{fileName}.wav')
    audio_dur = len(f) / f.samplerate

    r = sr.Recognizer()
    text = ""
    rec_dur = 25

    with sr.AudioFile(f'{fileName}.wav') as source:
        for x in range(0, int(audio_dur / rec_dur)):
            audio = r.record(source, duration=rec_dur)
            try:
                new_txt = r.recognize_google(audio)
                text = text + new_txt
            except:
                pass

        audio = r.record(source,
                         duration=(audio_dur - int(audio_dur / rec_dur)))
        try:
            new_txt = r.recognize_google(audio)
            text = text + new_txt
        except:
            pass

        print("Done")

    p = Punctuator('Demo-Europarl-EN.pcl')
    text = p.punctuate(text)

    tool = language_tool_python.LanguageTool('en-US')

    matches = tool.check(text)
    print(len(matches))

    for lab in range(len(matches)):
        print(lab)
        print(matches[lab].ruleId, matches[lab].replacements)

    text_new = tool.correct(text)

    print(text_new)

    nltk.download('punkt')
    nltk.download('stopwords')

    preprocessor = TextPreProcessor(NLTKTokenizer(), NLTKCleaner())
    similarity_algorithm = BM25Plus()
    ranker = TextRank()
    ir = ClassicalIR()

    # Text Summarization
    model = Summarizer(preprocessor, similarity_algorithm, ranker, ir)
    summarised_content = model.summarise(text_new,
                                         reduction_ratio=0.80,
                                         preserve_order=True)

    print("\n --- Summarized Text ---\n")
    print(construct_sentences_from_ranking(summarised_content))

    with open(f"{fileName}.txt", "w+") as file:
        file.write(construct_sentences_from_ranking(summarised_content))

    # Text Keyword Extraction
    preprocessor = TextPreProcessor(NLTKTokenizer(),
                                    NLTKCleaner(skip_stemming=True))
    keyword_extractor = KeywordExtractor(preprocessor, ClassicalIR())
    keywords = keyword_extractor.extract_keywords(text, count=10, raw=False)

    print("\n --- Keywords ---\n")
    print(keywords)

Exemple #11

0

Afficher le fichier

Fichier : test2.py Projet : khang1999/Punctuator

from punctuator import Punctuator

p = Punctuator('Demo-Europarl-EN.pcl')
output_file = open('output.txt', 'w')
output_file.write('Demo-Europarl_EN.pcl\n\n')
output_file.write(
    p.punctuate(
        'Uh Now last video we went through the first five steps AGADAP So you just kind of shown here of our uh relative combat power that weve chosen to do a penetration umh how weve laid out our forces arrayed them and uh set our phases and selected our leadership Okay so now we get our battle book page were gonna do our COA statement and sketch and we can drive on with our sketch Alright so heres the page out of the battle book and uh we need to do both our sketch and our statement Now for this Im gonna go ahead and and just start with uh the sketch Now uh we know that were gonna start out in our assembly area Alright so we can go ahead and draw that up here at the top of the page and I wanna alot leave uh quite a bit of room down here to show whats gonna happen for actions on the objective Remember the COA sketch isnt to scale uh Were gonna need uh some kind of minor departure We know thats gonna happen Alright and were gonna leave from the uh assembly area and were gonna move to an ORP right So we gotta put our ORP on here uh and since thats gonna be a movement theres gonna be an axis to get us there Alright and uh thinking through this theres probably some phase line here Alright because were gonna spend phase one in the assembly area phase two moving to the ORP and then phase three is all gonna happen in the ORP so theres probably a phase line here because were gonna act differently at that point'
    ))
#output_file.write(p.punctuate('this is a test sentence for part 1'))

p3 = Punctuator('INTERSPEECH-T-BRNN.pcl')
output_file.write('\n\nINTERSPEECH-T-BRNN.pcl\n\n')
output_file.write(
    p3.punctuate(
        'Uh Now last video we went through the first five steps AGADAP So you just kind of shown here of our uh relative combat power that weve chosen to do a penetration umh how weve laid out our forces arrayed them and uh set our phases and selected our leadership Okay so now we get our battle book page were gonna do our COA statement and sketch and we can drive on with our sketch Alright so heres the page out of the battle book and uh we need to do both our sketch and our statement Now for this Im gonna go ahead and and just start with uh the sketch Now uh we know that were gonna start out in our assembly area Alright so we can go ahead and draw that up here at the top of the page and I wanna alot leave uh quite a bit of room down here to show whats gonna happen for actions on the objective Remember the COA sketch isnt to scale uh Were gonna need uh some kind of minor departure We know thats gonna happen Alright and were gonna leave from the uh assembly area and were gonna move to an ORP right So we gotta put our ORP on here uh and since thats gonna be a movement theres gonna be an axis to get us there Alright and uh thinking through this theres probably some phase line here Alright because were gonna spend phase one in the assembly area phase two moving to the ORP and then phase three is all gonna happen in the ORP so theres probably a phase line here because were gonna act differently at that point'
    ))
#output_file.write(p3.punctuate('this is a test sentence for part 3'))

output_file.close()

Exemple #12

0

Afficher le fichier

import sys

punctuatorSelected = input(
    'Which punctuator would you like to test: \n1. Demo-Europarl-En.pcl \n2. INTERSPEECH-T-BRNN.pcl\n\n'
)

if (punctuatorSelected == '1'):
    p = Punctuator('Demo-Europarl-EN.pcl')
    pText = 'Demo-Europarl-EN'
if (punctuatorSelected == '2'):
    p = Punctuator('INTERSPEECH-T-BRNN.pcl')
    pText = 'INTERSPEECH-T-BRNN'

print('\nYou have selected ', pText, '\n')

textfile = sys.argv[1]
output_file = open('output.txt', 'w')

with open(textfile, 'r') as file:
    data = file.read().replace('\n', ' ')
    data = data.lower()

fillerwords = ['uh']
datawords = data.split()

processData = [word for word in datawords if word.lower() not in fillerwords]
finalText = ' '.join(processData)

print(p.punctuate(finalText))

output_file.write(p.punctuate(finalText))

Exemple #13

0

Afficher le fichier

def punctuate_text(text):
    p = Punctuator('models/INTERSPEECH-T-BRNN.pcl')
    print(p.punctuate(text))

Exemple #14

0

Afficher le fichier

def punctuate_conversation(conversation, loc):
    p = Punctuator(loc)
    punctuated_converse = p.punctuate(conversation)
    return punctuated_converse

Exemple #15

0

Afficher le fichier

Fichier : c19_main.py Projet : eddylzx/project_hearth

textD = "61 people passed away today. I am very tired, I want to sleep. I only slept for 1 hour last night."

p = Punctuator('Demo-Europarl-EN.pcl')
# NLP
def nlp_parser(text):
    parser.setup(text)
    # parser.display_tree()
    parser.extract_noun_verb()

def nummod_parser(text):
    nparser.setup(text)
    nparser.get_noun_count_pairs()

# Speech to text
recognizer = SR.Recognizer()
with SR.Microphone() as source:
    print("Speak:")
    audio = recognizer.listen(source)

try:
    text_result = recognizer.recognize_google(audio) + "."
    text_result_punctuated = p.punctuate(text_result)
    print("You said: " + text_result_punctuated)
    nlp_parser(text_result_punctuated)
    # mmod_parser(text_result_punctuated)
except SR.UnknownValueError:
    print("Could not understand audio")
except SR.RequestError as e:
    print("Could not request results; {0}".format(e))

Exemple #16

0

Afficher le fichier

def punctuator_src_test():
    loc = "./src/AI_files/punctuator/Demo-Europarl-EN.pcl"
    p = Punctuator(loc)
    punctuated_converse = p.punctuate("some one")
    return punctuated_converse

Exemple #17

0

Afficher le fichier

Fichier : views.py Projet : EasyPassApps/ytscribe

    def form_valid(self, form):
        self.object = form.save(commit=False)
        video_id = self.object.body.split('?v=')[1].split("&")[0]
        self.object.vid_id = video_id
        print('Does he have it?')
        print(self.object.vid_id)
        if (Post.objects.filter(vid_id=self.object.vid_id).exists()):
            yo = Post.objects.filter(vid_id=self.object.vid_id)[:1]
            print(yo[0].pk)
            print('REDIRECT!')
            return HttpResponseRedirect(
                reverse('article-detail',
                        kwargs={
                            'pk': str(yo[0].pk),
                            "yt": video_id
                        }))
            #return redirect('article-detail', post.pk post.vid_id+str(yo[0].pk)+'/'+str(video_id))

        data = scrape_url('http://youtube.com/watch?v=' + video_id)
        print(data.title)
        print(data.poster)
        srt = YouTubeTranscriptApi.get_transcript(video_id)
        totalduration = 0
        alltext = ""
        for item in srt:
            go = item['start']
            if (go - totalduration > 30):
                alltext = alltext + time.strftime(
                    '%H:%M:%S', time.gmtime(
                        item['start'])) + item['text'] + " "
                totalduration = item['start']
            else:
                alltext = alltext + item['text'] + " "

        r = re.findall(
            '(?:[0123456789]\d|2[0123456789]):(?:[0123456789]\d):(?:[0123456789]\d)',
            alltext)
        for item in r:
            # print(item)
            alltext = alltext.replace(
                item,
                "<br><a class='ytlink' href='#' type='button' onclick='seek(" +
                str(get_sec(item)) + ")'>" + item + "</a> </br>")

        print(alltext)
        file_ = os.path.join(settings.BASE_DIR, 'model.pcl')
        p = Punctuator(file_)
        punctuated = p.punctuate(alltext)

        self.object.title = data.title
        self.object.title_tag = data.poster
        # totaltext = punctuated.split(".")
        # finaltext = ""
        # for item in totaltext:
        # 	finaltext = finaltext + item +"."+"<br><br>"
        iframe = '''<iframe id="player" type="text/html" width="560" height="315" src="http://www.youtube.com/embed/PjDw3azfZWI?enablejsapi=1" frameborder="0"></iframe>'''
        embed = '''<iframe id="player" type="text/html" width="560" height="315" src="http://www.youtube.com/embed/''' + video_id + '''?enablejsapi=1" frameborder="0"></iframe>'''
        self.object.body = embed + "<br>" + alltext

        self.object.save()
        return HttpResponseRedirect(
            reverse('article-detail',
                    kwargs={
                        'pk': self.object.id,
                        "yt": video_id
                    }))

Exemple #18

0

Afficher le fichier

Fichier : process_text.py Projet : kinect59/MasterProject

    parts = line.split(" ")
    if len(parts) > 1:
        id = parts[0]
        val = " ".join(parts[1:])
        return id, val


with open(file, 'r') as fd:
    while True:
        line = fd.readline()
        if not line:
            break

        id, valstr = parse_line(line)
        val = json.loads(valstr)
        text = val.get("text")
        text = " ".join(text)
        text = text.replace("[Music]", " ").replace("\r",
                                                    " ").replace("\n", " ")
        text = ' '.join(text.split())
        sentences = sent_tokenize(text)

        punc_text = None
        punc_needed = True

        if len(sentences) > 2:
            punc_needed = False
            punc_text = text
        else:
            punc_text = punc.punctuate(text)

Exemple #19

0

Afficher le fichier

from punctuator import Punctuator
import sys

p = Punctuator('Demo-Europarl-EN.pcl')

textfile = sys.argv[1]

with open(textfile, 'r') as file:
    data = file.read().replace('\n', ' ')
    data = data.lower()

print(p.punctuate(data))

Exemple #20

0

Afficher le fichier

from punctuator import Punctuator
p = Punctuator('/home/erviewre/h3podcastbot/.punctuator/Demo-Europarl-EN.pcl')

import os

directory = os.fsencode("/home/erviewre/h3podcastbot/raw_scripts")
file_count = len(os.listdir(directory))
progress = 1

for file in os.listdir(directory):
    print(str(progress) + "/" + str(file_count))
    progress += 1
    filename = os.fsdecode(file)
    with open('/home/erviewre/h3podcastbot/raw_scripts/' + filename,
              'r') as open_file:
        data = open_file.read().replace('\n', ' ')
        with open('/home/erviewre/h3podcastbot/punctuated_scripts/' + filename,
                  'w') as punctuated_file:
            punctuated_file.write(p.punctuate(data))

Exemple #21

0

Afficher le fichier

def handlePunctuation(text):
    #Punctuator库
    #https://drive.google.com/drive/folders/0B7BsN5f2F1fZQnFsbzJ3TWxxMms
    p = Punctuator('Demo-Europarl-EN.pcl')
    return p.punctuate(text)

Exemple #22

0

Afficher le fichier

Fichier : audio_transcription.py Projet : lpianta/nlp_week

corrector = jamspell.TSpellCorrector()
corrector.LoadLangModel("../models/spellchecker_en.bin")

# load any audio file of your choice
speech, rate = librosa.load("../10mintest.mp3", sr=16000)
lenght = librosa.get_duration(speech, sr=16000)
n_chuncks = np.ceil(lenght / 10)
chuncks = np.array_split(speech, n_chuncks)


def transcriptor(chunks):
    string = ""
    for i in chuncks:
        input_values = tokenizer(i, return_tensors='pt').input_values
        # Store logits (non-normalized predictions)
        logits = model(input_values).logits
        # Store predicted id's
        predicted_ids = torch.argmax(logits, dim=-1)
        # decode the audio to generate text
        transcriptions = tokenizer.decode(predicted_ids[0])
        string += transcriptions + " "
    return string


text = transcriptor(chuncks)
# print(text)
text = text.lower()
text = punctuator.punctuate(text)
text = corrector.FixFragment(text)
print(text)

Exemple #23

0

Afficher le fichier

from punctuator import Punctuator
from pathlib import Path
import os

home = str(Path.home())
model_file = os.path.join(home, ".punctuator", "Demo-Europarl-EN.pcl")
print(f"model_file {model_file}")
p = Punctuator(model_file)
print("Loaded model in memory")
print(p.punctuate("some text"))
print(p.punctuate("some more text"))
print(p.punctuate("some more more text"))
print(p.punctuate("some many more text that needs punctuation"))