# In[2]: from Remove_link import remove_link from Remove_number import remove_number from Remove_punctuation import remove_punctuation from Remove_stopwords import remove_stopwords from Replace_netword import replace_netword from Replace_repeatwords import replace_repeatwords from Replace_ywz import replace_ywz from Translate_eng import translate_eng import time for i in range(2000): data_all[i]=translate_eng(data_all[i]) data_all[i]=replace_ywz(data_all[i]) data_all[i]=replace_repeatwords(data_all[i]) data_all[i]=replace_netword(data_all[i]) #data_all[i]=remove_stopwords(data_all[i]) data_all[i]=remove_punctuation(data_all[i]) data_all[i]=remove_number(data_all[i]) data_all[i]=remove_link(data_all[i]) print(i) # In[3]: import jieba all_data=[] for i in range(2000):
# coding: utf-8 # In[1]: for i in range(100): fname = "C:\\LAWHCA\\chinese-sentiment--analysis-preprocess\\neg\\neg." + str( i) + ".txt" print(fname) with open(fname, "r", errors="ignore") as f: z = f.read() from Translate_eng import translate_eng print(translate_eng(z)) # In[2]: # In[1]: neg = [] for i in range(1000): fname = "C:\\LAWHCA\\chinese-sentiment--analysis-preprocess\\neg\\neg." + str( i) + ".txt" print(fname) with open(fname, "r", errors="ignore") as f: neg.append(f.read()) # In[2]: pos = [] for i in range(1000): fname = "C:\\LAWHCA\\chinese-sentiment--analysis-preprocess\\pos\\pos." + str( i) + ".txt"