Ejemplo n.º 1
0
 def punctuationService(self, text):
     model_path = str(os.path.abspath("punc_models")) + "/deeppunct_params_en"
     checkpoints = str(os.path.abspath("checkpoint")) + "/deeppunct_checkpoint_google_news"
     print(model_path)
     print(checkpoints)
     corrector = DeepCorrect(model_path, checkpoints)
     segments_list=[]
     segments_list=corrector.correct(text)
     return segments_list
Ejemplo n.º 2
0
def main():

    args = parse_args()

    with open(args.input, mode='r') as read_text_file:
        line = read_text_file.readline()

        segmenter = DeepSegment('en')
        corrector = DeepCorrect(args.params_path, args.checkpoint_path)

    with open(args.output, mode='w') as close_text_file:
        for part in segmenter.segment(line):
            tester2 = corrector.correct(part)
            close_text_file.write(tester2[0]['sequence'] + '\n')
Ejemplo n.º 3
0
def dataPreProcessModel():
    print("Inside dataPreProcessModel")
    global corrector
    corrector = DeepCorrect('model_params/deeppunct_params_en',
                            'model_params/deeppunct_checkpoint_google_news')
    global segmenter
    segmenter = DeepSegment('en')
Ejemplo n.º 4
0
def dataPreProcessModel():
    print("Inside dataPreProcessModel")
    global corrector
    corrector = DeepCorrect(
        '/Users/Amitgarg/Documents/SJSU/272-Ranjan/Smart-MOM/model_params/deeppunct_params_en',
        '/Users/Amitgarg/Documents/SJSU/272-Ranjan/Smart-MOM/model_params/deeppunct_checkpoint_google_news'
    )
    global segmenter
    segmenter = DeepSegment('en')
Ejemplo n.º 5
0
def load_model(pretrained):
    logger.info("Loading pre-trained model...")
    model_dict = {0: 'deeppunct_checkpoint_tatoeba_cornell',\
                  1: 'deeppunct_checkpoint_google_news',\
                  2: 'deeppunct_checkpoint_wikipedia'}
    if pretrained not in model_dict.keys():
        model = 'deeppunct_checkpoint_tatoeba_cornell'
    else:
        model = model_dict[pretrained]
    checkpoint_path = "./model_data/%s" % model
    params_path = "./model_data/deeppunct_params_en"
    corrector = DeepCorrect(params_path, checkpoint_path)
    logger.info("Loaded!")
    return corrector
Ejemplo n.º 6
0
def processing(id):
    paragraph_object = Paragraph.objects.get(id=id)
    if not hasattr(globals, 'corrector') and not hasattr(globals, 'segmenter'):
        segmenter = DeepSegment('en')
        corrector = DeepCorrect('deep_punc/deeppunct_params_en',
                                'deep_punc/deeppunct_checkpoint_wikipedia')
        globals.corrector = corrector
        globals.segmenter = segmenter
    else:
        corrector = globals.corrector
        segmenter = globals.segmenter

    list_of_sentences = segmenter.segment(paragraph_object.original_text)
    paragraph = ''
    for i in range(len(list_of_sentences)):
        sentence = corrector.correct(list_of_sentences[i])
        if i == 0:
            paragraph += sentence[0]['sequence']
        else:
            paragraph += ' ' + sentence[0]['sequence']
    paragraph = paragraph.replace("\\", "")
    paragraph_object.processed_text = paragraph
    paragraph_object.processing = False
    paragraph_object.save()
Ejemplo n.º 7
0
    return (os.linesep.join(result))


# HTTP Server
app = flask.Flask("GrammarAPI")
CORS(app)


@app.route('/', methods=['GET'])
def home():
    return "<title>GrammarAPI</title><h1>GrammarAPI</h1><p>Please send POST request!</p>"


# Fav Endpoint
@app.route('/', methods=['POST'])
def api_main():
    return Response(beautify(request.get_data().decode("utf-8")),
                    mimetype='text/plain')


# Deep Stuff
corrector = DeepCorrect('deeppunct_params_en',
                        'deeppunct_checkpoint_google_news')
segmenter = DeepSegment('en')

# HTTP Server
print("Server Started")
port = int(os.environ.get("PORT", 5000))
http_server = WSGIServer(('0.0.0.0', port), app)
http_server.serve_forever()
Ejemplo n.º 8
0
import silence_tensorflow.auto

import sys
import spacy
from spacy_syllables import SpacySyllables
from collections import defaultdict, Counter
from gutenhaiku.cleaner import strip_headers
from gutenhaiku import models
from gutenhaiku import utils


with utils.supress_output():
    from deepcorrect import DeepCorrect

    corrector = DeepCorrect(
        models.MODEL_PATHS["params"], models.MODEL_PATHS["checkpoint"]
    )


nlp = spacy.load("en_core_web_sm")

syllables = SpacySyllables(nlp)

nlp.add_pipe(syllables, after="tagger")

IGNORE_POS = set(["SPACE", "PUNCT"])
REPLACE_CHARACTERS = str.maketrans({key: None for key in "!\"';?_-0123456789"})


def process_generator(text, progress_bar):
    CURRENT_HAIKU = defaultdict(list)
Ejemplo n.º 9
0
from path import Path
from nltk.tokenize import sent_tokenize

from django.contrib.auth import get_user_model
import os

from googletrans import Translator

# Run the following command in terminal to connect to redis channel
# docker run -p 6379:6379 -d redis:5

User = get_user_model()

#Initializing DeepCorrect
corrector = DeepCorrect(
    '/home/pranshu/GAMR/gamr/meetingmode/deepcorrect/deeppunct_params_en',
    '/home/pranshu/GAMR/gamr/meetingmode/deepcorrect/deeppunct_checkpoint_google_news'
)

#Initializing dataset for LexRank
print('loading dataset and initializing...')
documents = []
documents_dir = Path('/home/pranshu/GAMR/gamr/meetingmode/total')

for file_path in documents_dir.files('*.txt'):
    with file_path.open(mode='rt', encoding='latin1') as fp:
        documents.append(fp.readlines())

lxr = LexRank(documents, stopwords=STOPWORDS['en'])
print('dataset load done!')
print('server is running!')