def word_count(fn, p1, p2, f_size):
    """分段读取大文件并统计词频    
    Args:
        fn:所读文件名称
        p1:文件段的起始位置
        p2:文件段的结尾位置
        f_size:所读文件的大小,单位为B    
    ret:
        该进程所负责的文件段的词频统计结果
        type == collections.Counter
    """
    c = Counter()
    with open(fn, 'rb') as f:
        if p1:  # 为防止字被截断的,分段处所在行不处理,从下一行开始正式处理
            f.seek(p1 - 1)
            while b'\n' not in f.read(1):
                pass
        start = time.time()
        while 1:
            line = f.readline().decode(CODING)
            c.update(Counter(re.sub(r'\s+', '', line)))  #空格不统计
            pos = f.tell()
            if p1 == 0:  #显示进度
                processbar(pos, p2, fn, f_size, start)
            if pos >= p2:
                q.put(c)
                return c
Ejemplo n.º 2
0
 def count_single(self, from_file, f_size):
     '''单进程读取文件并统计词频'''
     start = time.time()
     with open(from_file, 'rb') as f:
         for line in f:
             self._c.update(self.parse(line))
             processbar(f.tell(), f_size, from_file, f_size, start)   
Ejemplo n.º 3
0
 def count_single(self, from_file, f_size):
     """单进程读取文件并统计词频"""
     start = time.time()
     with open(from_file, "rb") as fp:
         for line in fp:
             self._c.update(self.parse(line))
             processbar(fp.tell(), f_size, from_file, f_size, start)
def counter_single(from_file, f_size):
    '''单进程读取文件并统计词频'''
    c = Counter()
    start = time.time()
    with open(from_file, 'rb') as f:
        for line in f:
            c.update(Counter(re.sub(r'\s+', '', line.decode(CODING))))
            processbar(f.tell(), f_size, from_file, f_size, start)
    return c
Ejemplo n.º 5
0
 def count_multi(self, fn, p1, p2, f_size):  
     c = Counter()
     with open(fn, 'rb') as f:    
         if p1:  # 为防止字被截断的,分段处所在行不处理,从下一行开始正式处理
             f.seek(p1-1)
             while b'\n' not in f.read(1):
                 pass
         start = time.time()
         while 1:                        
             line = f.readline()                  
             c.update(self.parse(line)) 
             pos = f.tell()   
             if p1 == 0: #显示进度
                 processbar(pos, p2, fn, f_size, start)
             if pos >= p2:               
                 return c      
Ejemplo n.º 6
0
                             transforms.Lambda(lambda crops: torch.stack(
                                 [dataset_transform(crop) for crop in crops]))
                         ]))

test_loader = DataLoader(testset,
                         batch_size=batch_size // crop_num,
                         shuffle=False,
                         num_workers=8)

ans_dict = {}
output_pred = []
output_name = []

with torch.no_grad():
    for batch_idx, blob in enumerate(test_loader):
        processbar(batch_idx + 1, len(testset) * crop_num, end='\n')

        # run
        img = blob['img'].cuda()
        img = img.view(-1, *img.size()[2:])
        real_len = len(img) // crop_num
        res = net(img)
        img = None
        for i in range(real_len):
            a = res[i * crop_num:(i + 1) * crop_num]
            # res[i] = a.mean(dim=0)
            res[i] = a.max(dim=0)[0]
            a = None
        res = res[:real_len]
        pred = evalute(res)
        name = blob['name']