Ejemplo n.º 1
0


# In[2]:

from Remove_link import remove_link
from Remove_number import remove_number
from Remove_punctuation import remove_punctuation
from Remove_stopwords import remove_stopwords
from Replace_netword import replace_netword
from Replace_repeatwords import replace_repeatwords
from Replace_ywz import replace_ywz
from Translate_eng import translate_eng
import time
for i in range(2000):
    data_all[i]=translate_eng(data_all[i])
    data_all[i]=replace_ywz(data_all[i])
    data_all[i]=replace_repeatwords(data_all[i])
    data_all[i]=replace_netword(data_all[i])
    #data_all[i]=remove_stopwords(data_all[i])
    data_all[i]=remove_punctuation(data_all[i])
    data_all[i]=remove_number(data_all[i])
    data_all[i]=remove_link(data_all[i])
    print(i)


# In[3]:

import jieba
all_data=[]
for i in range(2000):
# coding: utf-8

# In[1]:

for i in range(100):
    fname = "C:\\LAWHCA\\chinese-sentiment--analysis-preprocess\\neg\\neg." + str(
        i) + ".txt"
    print(fname)
    with open(fname, "r", errors="ignore") as f:
        z = f.read()
    from Translate_eng import translate_eng
    print(translate_eng(z))

# In[2]:

# In[1]:

neg = []
for i in range(1000):
    fname = "C:\\LAWHCA\\chinese-sentiment--analysis-preprocess\\neg\\neg." + str(
        i) + ".txt"
    print(fname)
    with open(fname, "r", errors="ignore") as f:
        neg.append(f.read())

# In[2]:

pos = []
for i in range(1000):
    fname = "C:\\LAWHCA\\chinese-sentiment--analysis-preprocess\\pos\\pos." + str(
        i) + ".txt"