def run(self): write_information('is running', self.thread_id) fe = Fetcher(self.thread_id, self.conf, self.md) an = Analyzer(self.thread_id, self.conf, self.md) # idx = 0 nums = len(self.report_list) while idx < nums: report_id = self.report_list[idx] # 当前报告编号 idx += 1 # 下载、解压、获取崩溃堆栈 file_info = self.file_info_dict.get(report_id) # 查询文件信息,失败返回None fe_ret = fe.process(report_id, file_info) # if not fe_ret: # 然后判断执行是否成功 # continue # 分析、归类(仅服务器模式) if not self.conf.client_mode: an_ret = an.process_server(report_id) # if not an_ret: # 然后判断执行是否成功 # continue if self.remove_after_use: # 先清理当前步骤的临时文件 fe.remove_temps() if fe_ret: # 第一步骤成功 if self.conf.client_mode or \ (not self.conf.client_mode and an_ret): # 客户端模式不需要第二步骤,或者服务端模式第二步骤执行成功 self.num_succeed += 1
def parse_webpage(url_base, start_idx, end_idx): # download webpages try: vd = IOHelper.VisualizeDownload(url_base) page_info = vd.go() except Exception as e: write_information("failed to get web page!") return [] # decode to utf-8 page_info = page_info.decode('utf-8') # print(page_info) # find all report names p_name = re.compile(r'>(error_report_([\d]*).zip)<') id_list = p_name.findall(page_info) # currently unsorted # print(id_list) id_list = sorted(id_list, key=lambda x: int(x[1])) write_information( 'totally <%d> files found on server, ranging from %s to %s' % (len(id_list), id_list[0][1], id_list[-1][1])) # create file list new_id_list = [] for report in id_list: idx = int(report[1]) if start_idx >= 0 and idx < start_idx: continue if end_idx >= 0 and idx > end_idx: continue new_id_list.append(idx) return new_id_list
def try_download(self, filename, zip_path): # ret = True if not os.path.exists(zip_path): url = self.url_base + filename write_information('[Download&Save]:\t%s' % filename, self.thread_id) try: # 连接到文件URL req = urllib.request.urlopen(url) # 接收数据 data = req.read() # 写入文件 with open(zip_path, 'wb') as f: f.write(data) except Exception as e: # 下载和保存期间发生异常 write_information('[Download&Save]:error\t%s ' % filename, self.thread_id) if os.path.exists(zip_path): self.remove_file( zip_path ) # if no dirty file is stored, status is acceptable ret = False return ret
def get_classified_file(self): report_list = [] dfn = IOHelper.DumpFileName(self.conf) for root, dirs, files in os.walk(self.conf.classified_folder): for file in files: if file.startswith( self.conf.folder_prefix) and file.endswith(".zip"): report_list.append(dfn.get_report_id(file.split('.')[0])) write_information('totally %d classified files' % len(report_list)) return sorted(report_list)
def try_analyze(self, filename, zip_path, unzip_path, dmp_path, xml_path): # if os.path.exists(xml_path): return True if not os.path.exists(unzip_path): return True write_information("[load dump] %s" % dmp_path, self.thread_id) f1 = ctypes.c_char_p(dmp_path.encode('gb2312')) f2 = ctypes.c_char_p(pdb_path.encode('gb2312')) info_buf = ctypes.create_string_buffer(128000) # load dump info through DLL # ret = load_dump(f1, f2, info_buf) ret = load_dump2(self.thread_id, f1, f2, info_buf) if 0 == ret: write_information("[load dump]: succeed!", self.thread_id) try: # convert info into python xml format info = info_buf.value.decode('utf-8') # print(info) dumpXML = ET.ElementTree(ET.fromstring(info)) dumpXML.write(xml_path, 'utf-8') return True except Exception as e: write_information("[parse XML]: failed!", self.thread_id) write_information("[load dump]: failed!", self.thread_id) return False
def remove_file(self, file_path): """ 带有日志和异常处理的文件删除辅助函数 :param file_path: :return: """ try: if os.path.exists(file_path): if os.path.isfile(file_path): os.remove(file_path) else: shutil.rmtree(file_path) return True except os.error as e: self.status_valid = False write_information("remove failed! %s" % file_path, self.thread_id) return False
def try_unzip(self, filename, zip_path, unzip_path): # ret = True if os.path.exists(zip_path) and not os.path.exists(unzip_path): try: z = zipfile.ZipFile(zip_path, 'r') if self.dump_name in z.namelist(): write_information("[unzip]\t%s " % filename, self.thread_id) z.extract(self.dump_name, unzip_path) except Exception as e: # unzip exception, delete files write_information('[unzip]: error\t%s ' % zip_path, self.thread_id) if os.path.exists(zip_path): self.remove_file(unzip_path) if os.path.exists(unzip_path): self.remove_file(unzip_path) ret = False return ret
def go(self, thread_num, beg_idx, end_idx, enquire_webpage=False, ignore_classified=False, remove_after_use=False): # self.get_file_list(thread_num, beg_idx, end_idx, enquire_webpage, ignore_classified) # JobAssigner.delete_files(self.zip_dir, self.report_list, self.conf) write_information('totally [%d] files to proceed, thread number %d' % (len(self.report_list), self.thread_num)) # thread_file_list = [[] for i in range(self.thread_num)] for idx in range(len(self.report_list)): thread_file_list[idx % self.thread_num].append( self.report_list[idx]) idx += 1 threads = [] for i in range(self.thread_num): jh = JobHandler() jh.set_param(i, thread_file_list[i], self.file_info_dict, self.conf, self.md, remove_after_use=remove_after_use) threads.append(jh) # print(thread_file_list[i]) for i in range(self.thread_num): threads[i].start() for i in range(self.thread_num): threads[i].join()
def try_modifytime(self, filename, zip_path, f_datetime): # 没有输入时间 if not f_datetime: write_information('[Modify time]: error\t%s ' % filename, self.thread_id) return False ret = True try: write_information('[Modify time]:\t%s ' % filename, self.thread_id) # 转换为UTC时间 utc_offset = datetime.datetime.utcnow() - datetime.datetime.now() f_datetime = f_datetime + utc_offset f_datetime = f_datetime.replace(tzinfo=datetime.timezone.utc) # 打开文件 winfile = win32file.CreateFile( zip_path, win32con.GENERIC_WRITE, win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE | win32con.FILE_SHARE_DELETE, None, win32con.OPEN_EXISTING, win32con.FILE_ATTRIBUTE_NORMAL, None) # 修改文件时间属性 win32file.SetFileTime(winfile, f_datetime, f_datetime, f_datetime) # 关闭文件 winfile.close() except Exception as e: # 修改文件时间发生异常 write_information('[Modify time]: error\t%s ' % filename, self.thread_id) if os.path.exists(zip_path): self.remove_file( zip_path ) # if no dirty file is stored, status is acceptable ret = False return ret
def fetch_analyze(conf, md, repeat_num=-1, interval=5, ignore_classified=False, remove_after_use=False): # if not prepare_log(conf): # write_information('prepare log failed!') # return if not prepare_dll(conf): write_information('prepare dll failed!') write_information('dll path: %s' % conf.dumpload_dll) return if not prepare_symbol(conf): write_information('prepare symbol failed!') write_information('symbol path: %s' % conf.symbol_folder) return js = JobAssigner(conf, md) write_information('system started...') count = 0 while True: if 0 < repeat_num and repeat_num <= count: break if count > 99990: count = 0 write_information("<The %s time>" % count) count += 1 # 清理之前失效的临时文件夹 if remove_after_use: dc = IOHelper.DirCleaner(conf) # dc.clean_zip() dc.clean_unzip() js.go(conf.thread_num, conf.idx_beg, conf.idx_end, enquire_webpage=conf.retrieve_webpage, ignore_classified=ignore_classified, remove_after_use=remove_after_use) time.sleep(interval) return js.report_list
def parse_webpage2(url_base, start_idx, end_idx): # 下载页面 try: vd = IOHelper.VisualizeDownload(url_base) page_info = vd.go() except Exception as e: write_information("failed to get web page!") return [], {} # 保存页面 with open('page', 'wb') as f: f.write(page_info) # decode to utf-8 page_info = page_info.decode('utf-8') # print(page_info) # 提取异常报告文件列表 # <br> # 2017/5/18 17:26 888805 # <a href="http://222.73.55.231/BugTrap/reports/swcSelf8.9.3.4687/error_report_6.zip"> # error_report_6.zip # </a> pat = re.compile( r'<br>' # 起始标签 r'([0-9/ :]*?)' # 文件时间 日期 大小 (捕获变量0) r'<a href=".*?">' # URL r'(error_report_([\d]*).zip)' # 文件名(捕获变量1) 报告ID(捕获变量2) r'</a>', re.IGNORECASE) # 结束标签 file_info_dict = {} # report_id -> (time, size) res = pat.findall(page_info) for item in res: if len(item) < 3: # 至少三个捕获变量 continue # 文件信息 文件名 报告ID file_info, file_name, report_id = item # 2017/5/18 17:26 888805 # date time filesize f_date_str, f_time_str, f_size_str = file_info.split() f_date_time_str = f_date_str + " " + f_time_str # 拼接日期和时间字符串 f_date_time = datetime.datetime.strptime( f_date_time_str, "%Y/%m/%d %H:%M") # 获取当前时间(本地时间) # print(f_date_time) # print(type(item), file_info, file_name, report_id) report_id = int(report_id) # 报告编号 f_size = int(f_size_str) # 文件大小 file_info_dict[report_id] = (f_date_time, f_size) id_list = file_info_dict.keys() id_list = sorted(id_list) write_information( 'totally <%d> files found on server, ranging from %s to %s' % (len(id_list), id_list[0], id_list[-1])) # create file list new_id_list = [] for report_id in id_list: idx = int(report_id) if start_idx >= 0 and idx < start_idx: continue if end_idx >= 0 and idx > end_idx: continue new_id_list.append(idx) return new_id_list, file_info_dict