Example #1
0
def removeEmphasis(sentence):
    sentence = sentence.split()
    no_emphasis = [remove_emphasis(x) for x in sentence]
    return (no_emphasis)
Example #2
0
file = path.join('data','nsk_scrape.xlsx')
xl = pd.ExcelFile(file)
df = xl.parse('Sheet1')
df.head()

corpus = []
STOPWORDS = set(stopwords.words('greek'))
#Επεξεργασία ΓΝΩΜΟΔΟΤΗΣΕΩΝ
print(df.shape[0])
for i in range(0, df.shape[0]):
    subject = re.sub(r"\d+", '', df['Concultatory'][i],flags=re.I)
    subject = re.sub(r"[-,()/@\'?\.$%_+\d]", '', df['Concultatory'][i],flags=re.I)
    stemmer = gr_stemm.GreekStemmer()
    subject = subject.split()
    subject = [remove_emphasis(x) for x in subject]
    subject = [x.upper() for x in subject]
    subject = [stemmer.stem(word) for word in subject if not word in STOPWORDS and len(word)>=3]
    subject = [x.lower() for x in subject]
    subject = " ".join(subject)
    corpus.append(subject)
    #words_ = word_tokenize(subject)

corpus=pd.DataFrame(corpus, columns=['Concultatory'])

corpus.head()

result = corpus.join(df[['Status']])
result.groupby(['Status']).size()

result.head()
Example #3
0
def removeEmphasis(word):
    word = word.split()
    no_emphasis = [remove_emphasis(x) for x in word]
    return (no_emphasis)