Esempio n. 1
0
    def run(self):
        write_information('is running', self.thread_id)
        fe = Fetcher(self.thread_id, self.conf, self.md)
        an = Analyzer(self.thread_id, self.conf, self.md)

        #
        idx = 0
        nums = len(self.report_list)
        while idx < nums:
            report_id = self.report_list[idx]  # 当前报告编号
            idx += 1

            # 下载、解压、获取崩溃堆栈
            file_info = self.file_info_dict.get(report_id)  # 查询文件信息,失败返回None
            fe_ret = fe.process(report_id, file_info)
            # if not fe_ret:                     # 然后判断执行是否成功
            #     continue

            # 分析、归类(仅服务器模式)
            if not self.conf.client_mode:
                an_ret = an.process_server(report_id)

                # if not an_ret:                 # 然后判断执行是否成功
                #     continue

            if self.remove_after_use:  # 先清理当前步骤的临时文件
                fe.remove_temps()

            if fe_ret:  # 第一步骤成功
                if self.conf.client_mode or \
                        (not self.conf.client_mode and an_ret):
                    # 客户端模式不需要第二步骤,或者服务端模式第二步骤执行成功
                    self.num_succeed += 1
Esempio n. 2
0
    def parse_webpage(url_base, start_idx, end_idx):
        # download webpages
        try:
            vd = IOHelper.VisualizeDownload(url_base)
            page_info = vd.go()
        except Exception as e:
            write_information("failed to get web page!")
            return []

        # decode to utf-8
        page_info = page_info.decode('utf-8')
        # print(page_info)

        # find all report names
        p_name = re.compile(r'>(error_report_([\d]*).zip)<')
        id_list = p_name.findall(page_info)  # currently unsorted
        # print(id_list)
        id_list = sorted(id_list, key=lambda x: int(x[1]))
        write_information(
            'totally <%d> files found on server, ranging from %s to %s' %
            (len(id_list), id_list[0][1], id_list[-1][1]))

        # create file list
        new_id_list = []
        for report in id_list:
            idx = int(report[1])
            if start_idx >= 0 and idx < start_idx:
                continue
            if end_idx >= 0 and idx > end_idx:
                continue
            new_id_list.append(idx)
        return new_id_list
Esempio n. 3
0
    def try_download(self, filename, zip_path):
        #
        ret = True
        if not os.path.exists(zip_path):
            url = self.url_base + filename
            write_information('[Download&Save]:\t%s' % filename,
                              self.thread_id)
            try:
                # 连接到文件URL
                req = urllib.request.urlopen(url)

                # 接收数据
                data = req.read()

                # 写入文件
                with open(zip_path, 'wb') as f:
                    f.write(data)

            except Exception as e:
                # 下载和保存期间发生异常
                write_information('[Download&Save]:error\t%s ' % filename,
                                  self.thread_id)
                if os.path.exists(zip_path):
                    self.remove_file(
                        zip_path
                    )  # if no dirty file is stored, status is acceptable
                ret = False
        return ret
Esempio n. 4
0
    def get_classified_file(self):
        report_list = []
        dfn = IOHelper.DumpFileName(self.conf)
        for root, dirs, files in os.walk(self.conf.classified_folder):
            for file in files:
                if file.startswith(
                        self.conf.folder_prefix) and file.endswith(".zip"):
                    report_list.append(dfn.get_report_id(file.split('.')[0]))

        write_information('totally %d classified files' % len(report_list))
        return sorted(report_list)
Esempio n. 5
0
    def try_analyze(self, filename, zip_path, unzip_path, dmp_path, xml_path):
        #
        if os.path.exists(xml_path):
            return True
        if not os.path.exists(unzip_path):
            return True

        write_information("[load dump] %s" % dmp_path, self.thread_id)

        f1 = ctypes.c_char_p(dmp_path.encode('gb2312'))
        f2 = ctypes.c_char_p(pdb_path.encode('gb2312'))
        info_buf = ctypes.create_string_buffer(128000)

        # load dump info through DLL
        # ret = load_dump(f1, f2, info_buf)
        ret = load_dump2(self.thread_id, f1, f2, info_buf)
        if 0 == ret:
            write_information("[load dump]: succeed!", self.thread_id)

            try:
                # convert info into python xml format
                info = info_buf.value.decode('utf-8')
                # print(info)
                dumpXML = ET.ElementTree(ET.fromstring(info))
                dumpXML.write(xml_path, 'utf-8')
                return True
            except Exception as e:
                write_information("[parse XML]: failed!", self.thread_id)

        write_information("[load dump]: failed!", self.thread_id)
        return False
Esempio n. 6
0
 def remove_file(self, file_path):
     """
     带有日志和异常处理的文件删除辅助函数
     :param file_path: 
     :return: 
     """
     try:
         if os.path.exists(file_path):
             if os.path.isfile(file_path):
                 os.remove(file_path)
             else:
                 shutil.rmtree(file_path)
         return True
     except os.error as e:
         self.status_valid = False
         write_information("remove failed! %s" % file_path, self.thread_id)
         return False
Esempio n. 7
0
 def try_unzip(self, filename, zip_path, unzip_path):
     #
     ret = True
     if os.path.exists(zip_path) and not os.path.exists(unzip_path):
         try:
             z = zipfile.ZipFile(zip_path, 'r')
             if self.dump_name in z.namelist():
                 write_information("[unzip]\t%s " % filename,
                                   self.thread_id)
                 z.extract(self.dump_name, unzip_path)
         except Exception as e:
             # unzip exception, delete files
             write_information('[unzip]: error\t%s ' % zip_path,
                               self.thread_id)
             if os.path.exists(zip_path):
                 self.remove_file(unzip_path)
             if os.path.exists(unzip_path):
                 self.remove_file(unzip_path)
             ret = False
     return ret
Esempio n. 8
0
    def go(self,
           thread_num,
           beg_idx,
           end_idx,
           enquire_webpage=False,
           ignore_classified=False,
           remove_after_use=False):
        #
        self.get_file_list(thread_num, beg_idx, end_idx, enquire_webpage,
                           ignore_classified)
        # JobAssigner.delete_files(self.zip_dir, self.report_list, self.conf)

        write_information('totally [%d] files to proceed, thread number %d' %
                          (len(self.report_list), self.thread_num))

        #
        thread_file_list = [[] for i in range(self.thread_num)]
        for idx in range(len(self.report_list)):
            thread_file_list[idx % self.thread_num].append(
                self.report_list[idx])
            idx += 1

        threads = []
        for i in range(self.thread_num):
            jh = JobHandler()
            jh.set_param(i,
                         thread_file_list[i],
                         self.file_info_dict,
                         self.conf,
                         self.md,
                         remove_after_use=remove_after_use)
            threads.append(jh)

            # print(thread_file_list[i])

        for i in range(self.thread_num):
            threads[i].start()

        for i in range(self.thread_num):
            threads[i].join()
Esempio n. 9
0
    def try_modifytime(self, filename, zip_path, f_datetime):
        # 没有输入时间
        if not f_datetime:
            write_information('[Modify time]: error\t%s ' % filename,
                              self.thread_id)
            return False

        ret = True
        try:
            write_information('[Modify time]:\t%s ' % filename, self.thread_id)

            # 转换为UTC时间
            utc_offset = datetime.datetime.utcnow() - datetime.datetime.now()
            f_datetime = f_datetime + utc_offset
            f_datetime = f_datetime.replace(tzinfo=datetime.timezone.utc)

            # 打开文件
            winfile = win32file.CreateFile(
                zip_path, win32con.GENERIC_WRITE, win32con.FILE_SHARE_READ
                | win32con.FILE_SHARE_WRITE | win32con.FILE_SHARE_DELETE, None,
                win32con.OPEN_EXISTING, win32con.FILE_ATTRIBUTE_NORMAL, None)

            # 修改文件时间属性
            win32file.SetFileTime(winfile, f_datetime, f_datetime, f_datetime)

            # 关闭文件
            winfile.close()
        except Exception as e:
            #  修改文件时间发生异常
            write_information('[Modify time]: error\t%s ' % filename,
                              self.thread_id)
            if os.path.exists(zip_path):
                self.remove_file(
                    zip_path
                )  # if no dirty file is stored, status is acceptable
            ret = False
        return ret
Esempio n. 10
0
def fetch_analyze(conf,
                  md,
                  repeat_num=-1,
                  interval=5,
                  ignore_classified=False,
                  remove_after_use=False):
    # if not prepare_log(conf):
    #     write_information('prepare log failed!')
    #     return

    if not prepare_dll(conf):
        write_information('prepare dll failed!')
        write_information('dll path: %s' % conf.dumpload_dll)
        return

    if not prepare_symbol(conf):
        write_information('prepare symbol failed!')
        write_information('symbol path: %s' % conf.symbol_folder)
        return

    js = JobAssigner(conf, md)

    write_information('system started...')
    count = 0
    while True:
        if 0 < repeat_num and repeat_num <= count:
            break
        if count > 99990:
            count = 0
        write_information("<The %s time>" % count)
        count += 1

        # 清理之前失效的临时文件夹
        if remove_after_use:
            dc = IOHelper.DirCleaner(conf)
            # dc.clean_zip()
            dc.clean_unzip()

        js.go(conf.thread_num,
              conf.idx_beg,
              conf.idx_end,
              enquire_webpage=conf.retrieve_webpage,
              ignore_classified=ignore_classified,
              remove_after_use=remove_after_use)
        time.sleep(interval)

    return js.report_list
Esempio n. 11
0
    def parse_webpage2(url_base, start_idx, end_idx):
        # 下载页面
        try:
            vd = IOHelper.VisualizeDownload(url_base)
            page_info = vd.go()
        except Exception as e:
            write_information("failed to get web page!")
            return [], {}

        # 保存页面
        with open('page', 'wb') as f:
            f.write(page_info)

        # decode to utf-8
        page_info = page_info.decode('utf-8')
        # print(page_info)

        # 提取异常报告文件列表
        # <br>
        # 2017/5/18 17:26 888805
        # <a href="http://222.73.55.231/BugTrap/reports/swcSelf8.9.3.4687/error_report_6.zip">
        # error_report_6.zip
        # </a>
        pat = re.compile(
            r'<br>'  # 起始标签
            r'([0-9/ :]*?)'  # 文件时间 日期 大小  (捕获变量0)
            r'<a href=".*?">'  # URL
            r'(error_report_([\d]*).zip)'  # 文件名(捕获变量1)    报告ID(捕获变量2)
            r'</a>',
            re.IGNORECASE)  # 结束标签

        file_info_dict = {}  # report_id -> (time, size)
        res = pat.findall(page_info)
        for item in res:
            if len(item) < 3:  # 至少三个捕获变量
                continue

            # 文件信息 文件名     报告ID
            file_info, file_name, report_id = item

            # 2017/5/18     17:26   888805
            #   date        time    filesize
            f_date_str, f_time_str, f_size_str = file_info.split()

            f_date_time_str = f_date_str + " " + f_time_str  # 拼接日期和时间字符串
            f_date_time = datetime.datetime.strptime(
                f_date_time_str, "%Y/%m/%d %H:%M")  # 获取当前时间(本地时间)
            # print(f_date_time)
            # print(type(item), file_info, file_name, report_id)

            report_id = int(report_id)  # 报告编号
            f_size = int(f_size_str)  # 文件大小
            file_info_dict[report_id] = (f_date_time, f_size)

        id_list = file_info_dict.keys()
        id_list = sorted(id_list)
        write_information(
            'totally <%d> files found on server, ranging from %s to %s' %
            (len(id_list), id_list[0], id_list[-1]))

        # create file list
        new_id_list = []
        for report_id in id_list:
            idx = int(report_id)
            if start_idx >= 0 and idx < start_idx:
                continue
            if end_idx >= 0 and idx > end_idx:
                continue
            new_id_list.append(idx)
        return new_id_list, file_info_dict