def vsm_update(read_directory1, read_directory2, write_directory1, write_directory2): ''' 除去全0的行 :param read_directory1: :param read_directory2: :param write_directory1: :param write_directory2: ''' file_number = np.sum([len(files) for root, dirs, files in os.walk(read_directory1)]) for i in range(file_number): update_vsm = [] update_phst = [] f1 = open(read_directory1 + '/' + str(i + 1) + '.txt') each_weibo_vsm = f1.readlines() f1.close() f2 = open(read_directory2 + '/' + str(i + 1) + '.txt') phst = f2.readlines() f2.close() for j in range(len(each_weibo_vsm)): int_each_weibo_vsm = [int(x) for x in each_weibo_vsm[j].split()] #去掉全0行 if np.sum(int_each_weibo_vsm) > 0.1: update_vsm.append(each_weibo_vsm[j]) update_phst.append(phst[j]) quick_write_list_to_text2(update_vsm, write_directory1 + '/' + str(i + 1) + '.txt') quick_write_list_to_text2(update_phst, write_directory2 + '/' + str(i + 1) + '.txt') print "VSM Update Complete!!!"
def vsm_update(read_directory1, read_directory2, write_directory1, write_directory2): ''' 除去全0的行 :param read_directory1: :param read_directory2: :param write_directory1: :param write_directory2: ''' file_number = np.sum( [len(files) for root, dirs, files in os.walk(read_directory1)]) for i in range(file_number): update_vsm = [] update_id_time = [] f1 = open(read_directory1 + '/' + str(i + 1) + '.txt') each_weibo_vsm = f1.readlines() f1.close() id_time = [] get_text_to_complex_list2(id_time, read_directory2 + '/' + str(i + 1) + '.txt', 0, 2) for j in range(len(each_weibo_vsm)): int_each_weibo_vsm = [int(x) for x in each_weibo_vsm[j].split()] #去掉全0行 if np.sum(int_each_weibo_vsm) > 0.1: update_vsm.append(each_weibo_vsm[j]) update_id_time.append(" ".join(id_time[j])) quick_write_list_to_text2(update_vsm, write_directory1 + '/' + str(i + 1) + '.txt') quick_write_list_to_text(update_id_time, write_directory2 + '/' + str(i + 1) + '.txt') print "VSM Update Complete!!!"
''' @author: ZhuJiahui506 ''' import os import numpy as np from TextToolkit import quick_write_list_to_text2 if __name__ == '__main__': read_directory = 'D:/Local/FTP/gsod_2013' file_number = np.sum( [len(files) for root, dirs, files in os.walk(read_directory)]) write_filename = 'D:/Local/FTP/2013_all.op' result = [] for i in range(file_number): f1 = open(read_directory + '/1 (' + str(i + 1) + ').op') each_file = f1.readlines() f1.close() for each in each_file: result.append(each) quick_write_list_to_text2(result, write_filename) print "Complete"
''' @author: ZhuJiahui506 ''' import os import numpy as np from TextToolkit import quick_write_list_to_text2 if __name__ == '__main__': read_directory = 'D:/Local/FTP/gsod_2013' file_number = np.sum([len(files) for root, dirs, files in os.walk(read_directory)]) write_filename = 'D:/Local/FTP/2013_all.op' result = [] for i in range(file_number): f1 = open(read_directory + '/1 (' + str(i + 1) + ').op') each_file = f1.readlines() f1.close() for each in each_file: result.append(each) quick_write_list_to_text2(result, write_filename) print "Complete"