Example #1
0
def  load_spacy_model():
    text, tokens, keywords = extract('uploads/test4.pdf')

    nlp = spacy.load('en_core_web_lg')
    doc = nlp(text)

    nlp.to_disk('spacy_model')
Example #2
0
from gensim.models import word2vec

from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

import matplotlib as mpl
from wordcloud import WordCloud, STOPWORDS
from stopwords import stop_word_list
from pdf_extractor import extract

import spacy

stop_words =  stop_word_list()


text, tokens, keywords = extract('uploads/mytest.pdf')


for word in tokens:
    if word in stop_words:
        tokens.remove(word)  

cleantext = " ".join(tokens)



nlp = spacy.load('en_core_web_sm')  # make sure to use larger model!

doc = nlp(cleantext)
list_of_lists = []
for sentence in doc.sents:
Example #3
0
from gensim.models import word2vec

from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

import matplotlib as mpl
from wordcloud import WordCloud, STOPWORDS
from stopwords import stop_word_list
from pdf_extractor import extract

import spacy

stop_words = stop_word_list()

text, tokens, keywords = extract('uploads/test4.pdf')

for word in tokens:
    if word in stop_words:
        tokens.remove(word)

cleantext = " ".join(tokens)

nlp = spacy.load('en_core_web_sm')  # make sure to use larger model!

doc = nlp(cleantext)
list_of_lists = []
for sentence in doc.sents:
    inner_list = []
    for token in sentence:
        inner_list.append(token.text)