def word_count(fn, p1, p2, f_size): """分段读取大文件并统计词频 Args: fn:所读文件名称 p1:文件段的起始位置 p2:文件段的结尾位置 f_size:所读文件的大小,单位为B ret: 该进程所负责的文件段的词频统计结果 type == collections.Counter """ c = Counter() with open(fn, 'rb') as f: if p1: # 为防止字被截断的,分段处所在行不处理,从下一行开始正式处理 f.seek(p1 - 1) while b'\n' not in f.read(1): pass start = time.time() while 1: line = f.readline().decode(CODING) c.update(Counter(re.sub(r'\s+', '', line))) #空格不统计 pos = f.tell() if p1 == 0: #显示进度 processbar(pos, p2, fn, f_size, start) if pos >= p2: q.put(c) return c
def count_single(self, from_file, f_size): '''单进程读取文件并统计词频''' start = time.time() with open(from_file, 'rb') as f: for line in f: self._c.update(self.parse(line)) processbar(f.tell(), f_size, from_file, f_size, start)
def count_single(self, from_file, f_size): """单进程读取文件并统计词频""" start = time.time() with open(from_file, "rb") as fp: for line in fp: self._c.update(self.parse(line)) processbar(fp.tell(), f_size, from_file, f_size, start)
def counter_single(from_file, f_size): '''单进程读取文件并统计词频''' c = Counter() start = time.time() with open(from_file, 'rb') as f: for line in f: c.update(Counter(re.sub(r'\s+', '', line.decode(CODING)))) processbar(f.tell(), f_size, from_file, f_size, start) return c
def count_multi(self, fn, p1, p2, f_size): c = Counter() with open(fn, 'rb') as f: if p1: # 为防止字被截断的,分段处所在行不处理,从下一行开始正式处理 f.seek(p1-1) while b'\n' not in f.read(1): pass start = time.time() while 1: line = f.readline() c.update(self.parse(line)) pos = f.tell() if p1 == 0: #显示进度 processbar(pos, p2, fn, f_size, start) if pos >= p2: return c
transforms.Lambda(lambda crops: torch.stack( [dataset_transform(crop) for crop in crops])) ])) test_loader = DataLoader(testset, batch_size=batch_size // crop_num, shuffle=False, num_workers=8) ans_dict = {} output_pred = [] output_name = [] with torch.no_grad(): for batch_idx, blob in enumerate(test_loader): processbar(batch_idx + 1, len(testset) * crop_num, end='\n') # run img = blob['img'].cuda() img = img.view(-1, *img.size()[2:]) real_len = len(img) // crop_num res = net(img) img = None for i in range(real_len): a = res[i * crop_num:(i + 1) * crop_num] # res[i] = a.mean(dim=0) res[i] = a.max(dim=0)[0] a = None res = res[:real_len] pred = evalute(res) name = blob['name']