def handle(self): self.re_title = re.compile("<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I) self.googlechecker = checker(); # receive data print "From:", self.client_address datapieces = [] datapiece = self.request.recv(10240) datapieces.append(datapiece) print 'first get ', len(datapiece) piece_last = '' self.request.setblocking(0) while(len(datapiece) > 0): try: #print 'second get ', len(datapiece) datapiece = self.request.recv(10240) datapieces.append(datapiece) except Exception, e: print e time.sleep(0.5) #break last2pieces = ''.join((piece_last, datapiece)) if last2pieces.endswith('</root>'): break piece_last = datapiece
def handle(self): self.re_title = re.compile( "<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I) self.googlechecker = checker() # receive data print "From:", self.client_address datapieces = [] datapiece = self.request.recv(10240) datapieces.append(datapiece) print 'first get ', len(datapiece) piece_last = '' self.request.setblocking(0) while (len(datapiece) > 0): try: #print 'second get ', len(datapiece) datapiece = self.request.recv(10240) datapieces.append(datapiece) except Exception, e: print e time.sleep(0.5) #break last2pieces = ''.join((piece_last, datapiece)) if last2pieces.endswith('</root>'): break piece_last = datapiece
def start(self): host = '' port = 55555 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind((host, port)) s.listen(1) googlechecker = checker() re_title = re.compile("<title>([^\\x00]+?)</title>", re.I) while 1: try: clientsock, clientaddr = s.accept() print "Got connection from", clientsock.getpeername() while 1: try: data = clientsock.recv(4096) # if not len(data): # break matchs = re.findall(re_title, data) if len(matchs) < 2: print "ERROR NOTENOUGH TITLE:", data continue r = googlechecker.isInSamePage(matchs[0], matchs[1]) print 'Result is:', r except Exception, e: print e finally: clientsock.sendall(str(r)) clientsock.close() clientsock.close()
def start(self): host = "" port = 55555 s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind((host, port)) s.listen(1) googlechecker = checker() re_title = re.compile("<title>([^\\x00]+?)</title>", re.I) while 1: try: clientsock, clientaddr = s.accept() print "Got connection from", clientsock.getpeername() while 1: try: data = clientsock.recv(4096) # if not len(data): # break matchs = re.findall(re_title, data) if len(matchs) < 2: print "ERROR NOTENOUGH TITLE:", data continue r = googlechecker.isInSamePage(matchs[0], matchs[1]) print "Result is:", r except Exception, e: print e finally: clientsock.sendall(str(r)) clientsock.close() clientsock.close()
def start(self): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind((self.host, self.port)) s.listen(1) googlechecker = checker() re_title = re.compile( "<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I) while 1: try: clientsock, clientaddr = s.accept() print "Got connection from", clientsock.getpeername() while 1: try: data = clientsock.recv(409600000) if not len(data): break matchs = re.findall(re_title, data) print '- XML -----------------------------------------' print data print '- XML -----------------------------------------' paircount = 0 print '- Receive title pair --------------------------' for title1, title2 in matchs: print "%s: %s\n\t%s" % (paircount, title1, title2) paircount += 1 print '< Receive title pair --------------------------' # if len(matchs) < 2: # print "ERROR NOTENOUGH TITLE:", data # continue # [('t1', 't2'), ('b1', 'b2')] r = googlechecker.isInSamePageMulti(matchs) print 'Result is:', str(r) except Exception, e: print e finally: clientsock.send(str(r)) clientsock.send("\n") clientsock.close() clientsock.close()
def start(self): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.bind((self.host, self.port)) s.listen(1) googlechecker = checker(); re_title = re.compile("<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I) while 1: try: clientsock, clientaddr = s.accept() print "Got connection from", clientsock.getpeername() while 1: try: data = clientsock.recv(409600000) if not len(data): break matchs = re.findall(re_title, data) print '- XML -----------------------------------------' print data print '- XML -----------------------------------------' paircount = 0; print '- Receive title pair --------------------------' for title1, title2 in matchs: print "%s: %s\n\t%s" % (paircount, title1, title2) paircount += 1 print '< Receive title pair --------------------------' # if len(matchs) < 2: # print "ERROR NOTENOUGH TITLE:", data # continue # [('t1', 't2'), ('b1', 'b2')] r = googlechecker.isInSamePageMulti(matchs) print 'Result is:', str(r) except Exception, e: print e finally: clientsock.send(str(r)) clientsock.send("\n") clientsock.close() clientsock.close()
def __init__(self): self.re_title = re.compile("<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I) self.googlechecker = checker();
def __init__(self): self.max_threads = 2 self.google_checker = checker()
def __init__(self): self.re_title = re.compile( "<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I) self.googlechecker = checker()
def __init__(self): self.settings = Settings.getInstance() self.parsegoogle = GoogleResultParser() self.htmlRetriever = HtmlRetriever(self.settings.use_proxy) self.checker = checker()
def __init__(self): self.settings = Settings.getInstance() self.parsegoogle = GoogleResultParser() self.htmlRetriever = HtmlRetriever(self.settings.use_proxy) self.checker = checker()