def vsm_update(read_directory1, read_directory2, write_directory1, write_directory2):
    '''
    除去全0的行
    :param read_directory1:
    :param read_directory2:
    :param write_directory1:
    :param write_directory2:
    '''
    file_number = np.sum([len(files) for root, dirs, files in os.walk(read_directory1)])
    
    for i in range(file_number):
        update_vsm = []
        update_phst = [] 
        
        f1 = open(read_directory1 + '/' + str(i + 1) + '.txt')
        each_weibo_vsm = f1.readlines()
        f1.close()
        
        f2 = open(read_directory2 + '/' + str(i + 1) + '.txt')
        phst = f2.readlines()
        f2.close()
        
        for j in range(len(each_weibo_vsm)):
            int_each_weibo_vsm = [int(x) for x in each_weibo_vsm[j].split()]
            #去掉全0行
            if np.sum(int_each_weibo_vsm) > 0.1:
                update_vsm.append(each_weibo_vsm[j])
                update_phst.append(phst[j])

        quick_write_list_to_text2(update_vsm, write_directory1 + '/' + str(i + 1) + '.txt')
        quick_write_list_to_text2(update_phst, write_directory2 + '/' + str(i + 1) + '.txt')
    
    print "VSM Update Complete!!!"
Exemple #2
0
def vsm_update(read_directory1, read_directory2, write_directory1,
               write_directory2):
    '''
    除去全0的行
    :param read_directory1:
    :param read_directory2:
    :param write_directory1:
    :param write_directory2:
    '''
    file_number = np.sum(
        [len(files) for root, dirs, files in os.walk(read_directory1)])

    for i in range(file_number):
        update_vsm = []
        update_id_time = []

        f1 = open(read_directory1 + '/' + str(i + 1) + '.txt')
        each_weibo_vsm = f1.readlines()
        f1.close()

        id_time = []

        get_text_to_complex_list2(id_time,
                                  read_directory2 + '/' + str(i + 1) + '.txt',
                                  0, 2)

        for j in range(len(each_weibo_vsm)):
            int_each_weibo_vsm = [int(x) for x in each_weibo_vsm[j].split()]
            #去掉全0行
            if np.sum(int_each_weibo_vsm) > 0.1:
                update_vsm.append(each_weibo_vsm[j])
                update_id_time.append(" ".join(id_time[j]))

        quick_write_list_to_text2(update_vsm,
                                  write_directory1 + '/' + str(i + 1) + '.txt')
        quick_write_list_to_text(update_id_time,
                                 write_directory2 + '/' + str(i + 1) + '.txt')

    print "VSM Update Complete!!!"
'''
@author: ZhuJiahui506
'''
import os
import numpy as np
from TextToolkit import quick_write_list_to_text2

if __name__ == '__main__':
    read_directory = 'D:/Local/FTP/gsod_2013'
    file_number = np.sum(
        [len(files) for root, dirs, files in os.walk(read_directory)])
    write_filename = 'D:/Local/FTP/2013_all.op'

    result = []
    for i in range(file_number):
        f1 = open(read_directory + '/1 (' + str(i + 1) + ').op')
        each_file = f1.readlines()
        f1.close()

        for each in each_file:
            result.append(each)

    quick_write_list_to_text2(result, write_filename)
    print "Complete"
'''
@author: ZhuJiahui506
'''
import os
import numpy as np
from TextToolkit import quick_write_list_to_text2

if __name__ == '__main__':
    read_directory = 'D:/Local/FTP/gsod_2013'
    file_number = np.sum([len(files) for root, dirs, files in os.walk(read_directory)])
    write_filename = 'D:/Local/FTP/2013_all.op'
    
    result = []
    for i in range(file_number):
        f1 = open(read_directory + '/1 (' + str(i + 1) + ').op')
        each_file = f1.readlines()
        f1.close()
        
        for each in each_file:
            result.append(each)
    
    quick_write_list_to_text2(result, write_filename)
    print "Complete"