def sleep_if_not_empty(self): if self.count_: flush_log() sleepTime = random.uniform(80, 100) printDebug('finished one pass, sleeping for %f...' % sleepTime) time.sleep(sleepTime) printDebug('begining next pass') self.count_ = 0
def do_resource(self, resource_urls, http_request): for resource_url in resource_urls: path = resource_url file_name = path[path.rindex('/') + 1:] if os.path.isfile(file_name): continue image_saver = ImageSaver.ImageSaver('./', self.http_request_) http_request_new = self.construct_request(resource_url, http_request) printDebug('PageDelegate::do_img: handling img %s, new request: (%s)' % (resource_url, http_request_new)) HttpFetchProcess.newDownloader(image_saver).download(http_request_new)
def hitEnd(self): if not self.composedName_ and self.backTraceHit_ and self.lines_ : # no effectic semantic found printDebug('no effectic semantic found' + str(self.lines_)) #raise WrongSemanticError() if self.composedName_: self.IOMapping_[self.totalCount_ - 1] = (self.composedName_,self.lines_) self.composedName_ = None self.currentAllocation_ = 0 self.lines_ = []
def hitEnd(self): if not self.composedName_ and self.backTraceHit_ and self.lines_: # no effectic semantic found printDebug('no effectic semantic found' + str(self.lines_)) #raise WrongSemanticError() if self.composedName_: self.IOMapping_[self.totalCount_ - 1] = (self.composedName_, self.lines_) self.composedName_ = None self.currentAllocation_ = 0 self.lines_ = []
def construct_request(self, url, http_request): if url.startswith('//'): url = 'http:' + url if url.startswith('/'): return HttpRequest(http_request.host, url, jar = http_request.jar) elif not re.match('^\w+:\/\/', url): return HttpRequest(http_request.host, http_request.path[0 : http_request.path.rindex('/') + 1] + url, jar = http_request.jar) elif not url.startswith('http://'): printDebug('PageDelegateBase::construct_request: url not starts with http://: %s' % url) return None else: host_end = url[7:].index('/') + 7 return HttpRequest(url[7 : host_end], url[host_end:], jar = http_request.jar)
def main(): configure = get_args_read() if not 'host' in configure: printError('There should be "host" field in your map') sys.exit(1) if not 'path' in configure: printError('There should be "path" field in your map') sys.exit(1) if 'max_parallel_pages' in configure: MAX_PARALLEL_PAGES = configure['max_parallel_pages'] else: MAX_PARALLEL_PAGES = 1 if 'page_limit' in configure: page_limit_count = configure['page_limit'] else: page_limit_count = 0 cj = cookielib.MozillaCookieJar() cj.load('./cookies.txt') HttpFetchProcess.start() main_dict = configure main_request = PageDelegate.HttpRequest(configure['host'], configure['path'], jar = cj) main_page_request = Page.PageRequest(main_request, main_dict) new_page_requests = [] sleeper = SleepForClass() while True: for i in range(MAX_PARALLEL_PAGES): main_page_delegate = PageDelegate.PageDelegate() if not new_page_requests: if i == 0: page_request = main_page_request if page_limit_count != 0: page_request.set_limit(Page.PageLimit(page_limit_count)) sleeper.sleep_if_not_empty() Page.do_page(page_request, main_page_delegate, new_page_requests) sleeper.inc_count() break else: page_request = new_page_requests.pop(0) page_limit = page_request.get_limit() if page_limit: if page_limit.is_out(): new_page_requests = [] break page_limit.dec() Page.do_page(page_request, main_page_delegate, new_page_requests) sleeper.inc_count() while HttpFetchProcess.next(): pass printDebug('<!------------------------------ count = ' + str(sleeper.count_)) sleeper.check()
def parse(g, f): s = struct.Struct("<L") st = struct.Struct("<LLLL") generalList = [] while True: Buf = f.read(16) if not Buf or len(Buf) != 16: break t = st.unpack(Buf) addr = t[0] addrLen = t[1] backtraceLen = t[2] dataAttrib = t[3] backtraces = None special = 0 #print "{0:08x}, {1:08x}, {2:08x}, {3:08x}".format(addr, addrLen, backtraceLen, dataAttrib) if (backtraceLen > 0) and ((backtraceLen & special_magic) == 0): backtraces = [] for i in range(backtraceLen): backtraceElementBuf = f.read(4) if not backtraceElementBuf or len(backtraceElementBuf) != 4: raise ParseError() backtraceElement = s.unpack(backtraceElementBuf) backtraces.append(backtraceElement[0]) else: #thread data or global variable special = backtraceLen if special: if special == thread_data: printDebug("thread:{0:08x}-{1:08x} special = {2:08x}".format(addr, addr+addrLen, special)) else: printDebug("global:{0:08x}-{1:08x} special = {2:08x}".format(addr, addr+addrLen, special)) userContent = None if (dataAttrib & DATA_ATTR_USER_CONTENT) != 0 and addrLen > 0: userContent = f.read(addrLen) if not userContent or len(userContent) != addrLen: printError("{0:08x}, {1}, {2}".format(addr, len(userContent), addrLen)) raise ParseError() e = HeapElement(addr, addrLen, backtraces, userContent) if special: e.special = special e.dataAttrib = dataAttrib g.addElement(e) generalList.append(e) return generalList
def handleBuf(self,buf): printDebug("buf length is {0}".format(len(buf))) offset = 0 overallSize = struct.unpack_from("<i",buf,offset)[0] offset = offset + 4 infoSize = struct.unpack_from("<i",buf,offset)[0] offset = offset + 4 totalMemory = struct.unpack_from("<i",buf,offset)[0] offset = offset + 4 backtraceSize = struct.unpack_from("<i",buf,offset)[0] offset = offset + 4 printDebug("overallSize = {0};infoSize = {1},totalMemory = {2},backtraceSize = {3}".format(overallSize,infoSize,totalMemory,backtraceSize)) meminfoSize = overallSize + 4 * 4 while offset < meminfoSize: offset = self.printEntry(buf,offset,infoSize)
def printEntry(self,buf,offset,infoSize): if len(buf) - offset < infoSize: printDebug("the buffer is too small! exit!") return len(buf) endOffset = offset + infoSize size = struct.unpack_from("<I",buf,offset)[0] if size & SIZE_FLAG_ZYGOTE_CHILD: size = size & ~SIZE_FLAG_ZYGOTE_CHILD offset = offset + 4 allocations = struct.unpack_from("<I",buf,offset)[0] offset = offset + 4 backtraces = [] while offset < endOffset: backtrace = struct.unpack_from("<I",buf,offset)[0] offset = offset + 4 backtraces.append(backtrace) self.genInfo(size,allocations,backtraces) return offset
def printEntry(self, buf, offset, infoSize): if len(buf) - offset < infoSize: printDebug("the buffer is too small! exit!") return len(buf) endOffset = offset + infoSize size = struct.unpack_from("<I", buf, offset)[0] if size & SIZE_FLAG_ZYGOTE_CHILD: size = size & ~SIZE_FLAG_ZYGOTE_CHILD offset = offset + 4 allocations = struct.unpack_from("<I", buf, offset)[0] offset = offset + 4 backtraces = [] while offset < endOffset: backtrace = struct.unpack_from("<I", buf, offset)[0] offset = offset + 4 backtraces.append(backtrace) self.genInfo(size, allocations, backtraces) return offset
def parseObjectName(self,filePath): separatorIndex = filePath.rfind(':') if separatorIndex == -1: return None try: lineNum = int(filePath[separatorIndex + 1 :]) except Exception as e: printDebug(filePath) return None try: with open(filePath[:separatorIndex]) as f: while True: l = f.readline() if not l: break lineNum -= 1 if not lineNum: #hit here return self.typeParser_.parseObjectName(l) except: return None # eat the exception
def handleBuf(self, buf): printDebug("buf length is {0}".format(len(buf))) offset = 0 overallSize = struct.unpack_from("<i", buf, offset)[0] offset = offset + 4 infoSize = struct.unpack_from("<i", buf, offset)[0] offset = offset + 4 totalMemory = struct.unpack_from("<i", buf, offset)[0] offset = offset + 4 backtraceSize = struct.unpack_from("<i", buf, offset)[0] offset = offset + 4 printDebug( "overallSize = {0};infoSize = {1},totalMemory = {2},backtraceSize = {3}" .format(overallSize, infoSize, totalMemory, backtraceSize)) meminfoSize = overallSize + 4 * 4 while offset < meminfoSize: offset = self.printEntry(buf, offset, infoSize)
def parseObjectName(self, filePath): separatorIndex = filePath.rfind(':') if separatorIndex == -1: return None try: lineNum = int(filePath[separatorIndex + 1:]) except Exception as e: printDebug(filePath) return None try: with open(filePath[:separatorIndex]) as f: while True: l = f.readline() if not l: break lineNum -= 1 if not lineNum: #hit here return self.typeParser_.parseObjectName(l) except: return None # eat the exception
def callBack(line): leftStart = 0 while leftStart != len(line): leftMatch = semantic[0].search(line,leftStart) if not leftMatch: break middleStart = leftMatch.end() if leftMatch.start() == leftMatch.end(): middleStart += 1 middleMatch = self.objectRe.match(line,middleStart) if not middleMatch: leftStart = leftMatch.end() if leftMatch.start() == leftMatch.end(): leftStart += 1 continue rightStart = middleMatch.end() rightMatch = semantic[1].match(line,rightStart) #if True: # print ':'+ str(rightMatch) # print line[rightStart:] # print (semantic[0].pattern,semantic[1].pattern) if not rightMatch: leftStart = leftMatch.end() if leftMatch.start() == leftMatch.end(): leftStart += 1 continue # find the object here foundedName = line[middleMatch.start():middleMatch.end()] if foundedName == 'T': # this is a template! return None printDebug("found named:{0}---{1}".format(foundedName,line)) return foundedName return None
def callBack(line): leftStart = 0 while leftStart != len(line): leftMatch = semantic[0].search(line, leftStart) if not leftMatch: break middleStart = leftMatch.end() if leftMatch.start() == leftMatch.end(): middleStart += 1 middleMatch = self.objectRe.match(line, middleStart) if not middleMatch: leftStart = leftMatch.end() if leftMatch.start() == leftMatch.end(): leftStart += 1 continue rightStart = middleMatch.end() rightMatch = semantic[1].match(line, rightStart) #if True: # print ':'+ str(rightMatch) # print line[rightStart:] # print (semantic[0].pattern,semantic[1].pattern) if not rightMatch: leftStart = leftMatch.end() if leftMatch.start() == leftMatch.end(): leftStart += 1 continue # find the object here foundedName = line[middleMatch.start():middleMatch.end()] if foundedName == 'T': # this is a template! return None printDebug("found named:{0}---{1}".format(foundedName, line)) return foundedName return None
def fail_to_get(self, http_request, err): printDebug('PageDelegate::fail_to_get: http_request: %s, err: %s' % (str(http_request), err))
def fail_to_get(self, http_request, errstring): printDebug('fails to get http_request: %s, errstring: %s' % (str(http_request), errstring))
def do_get_url(http_request, page_delegate, callback): printDebug('Page::do_get_url') return page_delegate.get_from_url(http_request, callback)
def callBack(line): m = semantic.search(line) if m: printDebug("found named:{0}---{1}".format(semanticTuple[2],line)) return semanticTuple[2]
def callBack(line): m = semantic.search(line) if m: printDebug("found named:{0}---{1}".format( semanticTuple[2], line)) return semanticTuple[2]