def handle(self):
			self.re_title = re.compile("<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I)
			self.googlechecker = checker();		

			# receive data																				  
			print "From:", self.client_address
			datapieces = []
			datapiece = self.request.recv(10240)
			datapieces.append(datapiece)
			print 'first get ', len(datapiece)

			piece_last = ''
			
			self.request.setblocking(0)
			while(len(datapiece) > 0):
				try:
					#print 'second get ', len(datapiece)
					datapiece = self.request.recv(10240)
					datapieces.append(datapiece)
				except Exception, e:
					print e
					time.sleep(0.5)
					#break
				last2pieces = ''.join((piece_last, datapiece))
				if  last2pieces.endswith('</root>'):
					break
				piece_last = datapiece
        def handle(self):
            self.re_title = re.compile(
                "<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>",
                re.I)
            self.googlechecker = checker()

            # receive data
            print "From:", self.client_address
            datapieces = []
            datapiece = self.request.recv(10240)
            datapieces.append(datapiece)
            print 'first get ', len(datapiece)

            piece_last = ''

            self.request.setblocking(0)
            while (len(datapiece) > 0):
                try:
                    #print 'second get ', len(datapiece)
                    datapiece = self.request.recv(10240)
                    datapieces.append(datapiece)
                except Exception, e:
                    print e
                    time.sleep(0.5)
                    #break
                last2pieces = ''.join((piece_last, datapiece))
                if last2pieces.endswith('</root>'):
                    break
                piece_last = datapiece
Exemple #3
0
    def start(self):
        host = ''
        port = 55555

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        s.bind((host, port))
        s.listen(1)
        googlechecker = checker()

        re_title = re.compile("<title>([^\\x00]+?)</title>", re.I)
        while 1:
            try:
                clientsock, clientaddr = s.accept()
                print "Got connection from", clientsock.getpeername()
                while 1:
                    try:
                        data = clientsock.recv(4096)
                        #			if not len(data):
                        #				break
                        matchs = re.findall(re_title, data)
                        if len(matchs) < 2:
                            print "ERROR NOTENOUGH TITLE:", data
                            continue

                        r = googlechecker.isInSamePage(matchs[0], matchs[1])
                        print 'Result is:', r

                    except Exception, e:
                        print e
                    finally:
                        clientsock.sendall(str(r))
                        clientsock.close()
                clientsock.close()
Exemple #4
0
    def start(self):
        host = ""
        port = 55555

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        s.bind((host, port))
        s.listen(1)
        googlechecker = checker()

        re_title = re.compile("<title>([^\\x00]+?)</title>", re.I)
        while 1:
            try:
                clientsock, clientaddr = s.accept()
                print "Got connection from", clientsock.getpeername()
                while 1:
                    try:
                        data = clientsock.recv(4096)
                        # 			if not len(data):
                        # 				break
                        matchs = re.findall(re_title, data)
                        if len(matchs) < 2:
                            print "ERROR NOTENOUGH TITLE:", data
                            continue

                        r = googlechecker.isInSamePage(matchs[0], matchs[1])
                        print "Result is:", r

                    except Exception, e:
                        print e
                    finally:
                        clientsock.sendall(str(r))
                        clientsock.close()
                clientsock.close()
Exemple #5
0
    def start(self):
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        s.bind((self.host, self.port))
        s.listen(1)
        googlechecker = checker()

        re_title = re.compile(
            "<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>",
            re.I)
        while 1:
            try:
                clientsock, clientaddr = s.accept()
                print "Got connection from", clientsock.getpeername()
                while 1:
                    try:
                        data = clientsock.recv(409600000)
                        if not len(data):
                            break

                        matchs = re.findall(re_title, data)

                        print '- XML -----------------------------------------'
                        print data
                        print '- XML -----------------------------------------'
                        paircount = 0
                        print '- Receive title pair --------------------------'
                        for title1, title2 in matchs:
                            print "%s: %s\n\t%s" % (paircount, title1, title2)
                            paircount += 1
                        print '< Receive title pair --------------------------'

                        #						if len(matchs) < 2:
                        #							print "ERROR NOTENOUGH TITLE:", data
                        #							continue

                        # [('t1', 't2'), ('b1', 'b2')]
                        r = googlechecker.isInSamePageMulti(matchs)
                        print 'Result is:', str(r)

                    except Exception, e:
                        print e
                    finally:
                        clientsock.send(str(r))
                        clientsock.send("\n")
                        clientsock.close()
                clientsock.close()
	def start(self):
		s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
		s.bind((self.host, self.port))
		s.listen(1)
		googlechecker = checker();

		re_title = re.compile("<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I)
		while 1:
			try:
				clientsock, clientaddr = s.accept()
				print "Got connection from", clientsock.getpeername()
				while 1:
					try:
						data = clientsock.recv(409600000)
						if not len(data):
							break

						matchs = re.findall(re_title, data)

						print '- XML -----------------------------------------'
						print data
						print '- XML -----------------------------------------'
						paircount = 0;
						print '- Receive title pair --------------------------'
						for title1, title2 in matchs:
							print "%s: %s\n\t%s" % (paircount, title1, title2)
							paircount += 1
						print '< Receive title pair --------------------------'
						
#						if len(matchs) < 2:
#							print "ERROR NOTENOUGH TITLE:", data
#							continue

						# [('t1', 't2'), ('b1', 'b2')]
						r = googlechecker.isInSamePageMulti(matchs)
						print 'Result is:', str(r)

					except Exception, e:
						print e
					finally:
						clientsock.send(str(r))
						clientsock.send("\n")
						clientsock.close()
				clientsock.close()
	def __init__(self):
		self.re_title = re.compile("<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>", re.I)
		self.googlechecker = checker();
	def __init__(self):
		self.max_threads = 2
		self.google_checker = checker()
 def __init__(self):
     self.re_title = re.compile(
         "<title1>([^\\x00]+?)</title1>\\s*<title2>([^\\x00]+?)</title2>",
         re.I)
     self.googlechecker = checker()
	def __init__(self):
		self.settings = Settings.getInstance()
		self.parsegoogle = GoogleResultParser()
		self.htmlRetriever = HtmlRetriever(self.settings.use_proxy)
		self.checker = checker()
Exemple #11
0
 def __init__(self):
     self.settings = Settings.getInstance()
     self.parsegoogle = GoogleResultParser()
     self.htmlRetriever = HtmlRetriever(self.settings.use_proxy)
     self.checker = checker()