Ejemplo n.º 1
0
class UrlBloom:
    '''BloomFilter: check elements repetition'''
    def __init__(self, _capacity=1000000, _error_rate=0.00001):
        self.is_full = False
        # determine if open backup bloom data by time
        if CONFIG.get('BACKUP', 0) == 1:
            self.bomb = TimeBomb(CONFIG['TMP_DIR'] + CONFIG['BLOOM_FILE'])
            self.filter = self.bomb.load()
            if self.filter is None:
                self.filter = BloomFilter(capacity=_capacity, error_rate=_error_rate)
            self.bomb.dump(self.filter)
        else:
            self.filter = BloomFilter(capacity=_capacity, error_rate=_error_rate)

    def add(self, links):
        if self.is_full:
            return
        try:
            for ele in links:
                self.filter.add(ele)
        except IndexError:
            # rasie IndexError when bloom is at capacity
            self.is_full = True


    def clean(self, links):
        res = []
        for ele in links:
            if ele not in self.filter:
                res.append(ele)
        return res
Ejemplo n.º 2
0
 def __init__(self, _capacity=1000000, _error_rate=0.00001):
     self.is_full = False
     # determine if open backup bloom data by time
     if CONFIG.get('BACKUP', 0) == 1:
         self.bomb = TimeBomb(CONFIG['TMP_DIR'] + CONFIG['BLOOM_FILE'])
         self.filter = self.bomb.load()
         if self.filter is None:
             self.filter = BloomFilter(capacity=_capacity, error_rate=_error_rate)
         self.bomb.dump(self.filter)
     else:
         self.filter = BloomFilter(capacity=_capacity, error_rate=_error_rate)
Ejemplo n.º 3
0
 def __init__(self):
     super(SQueue, self).__init__()
     #create a socket
     self.server = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
     self.server.setblocking(0)
     #set option reused
     self.server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR  , 1)
     self.server.bind((CONFIG['HOST'], CONFIG['PORT']))
     self.server.listen(10)
     #sockets from which we except to read
     self.inputs = [self.server]
     #sockets from which we expect to write
     self.outputs = []
     #Outgoing message queues
     # determine if open backup queue data by time
     if CONFIG.get('BACKUP', 0) == 1:
         self.bomb = TimeBomb(CONFIG['TMP_DIR'] + CONFIG['QUEUE_FILE'], True)
         self.message_queues = self.bomb.q_load()
         self.bomb.q_dump(self.message_queues)
     else:
         self.message_queues = Queue.Queue()
     self.helper = CommonHelper()
Ejemplo n.º 4
0
class SQueue(object):
    """ queue server to listen request """
    def __init__(self):
        super(SQueue, self).__init__()
        #create a socket
        self.server = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
        self.server.setblocking(0)
        #set option reused
        self.server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR  , 1)
        self.server.bind((CONFIG['HOST'], CONFIG['PORT']))
        self.server.listen(10)
        #sockets from which we except to read
        self.inputs = [self.server]
        #sockets from which we expect to write
        self.outputs = []
        #Outgoing message queues
        # determine if open backup queue data by time
        if CONFIG.get('BACKUP', 0) == 1:
            self.bomb = TimeBomb(CONFIG['TMP_DIR'] + CONFIG['QUEUE_FILE'], True)
            self.message_queues = self.bomb.q_load()
            self.bomb.q_dump(self.message_queues)
        else:
            self.message_queues = Queue.Queue()
        self.helper = CommonHelper()

    def start(self):
        while self.inputs:
            if CONFIG.get('BACKUP', 0) == 1 and (not self.bomb.is_sleep):
                time.sleep(10)
            print "waiting for next event"
            readable , writable , exceptional = select.select(self.inputs, self.outputs, self.inputs, CONFIG['TIME_OUT'])
            for s in readable :
                if s is self.server:
                    # A "readable" socket is ready to accept a connection
                    connection, client_address = s.accept()
                    print "    connection from ", client_address
                    connection.setblocking(0)
                    self.inputs.append(connection)
                else:
                    data = s.recv(CONFIG['RECV_BUFFER'])
                    if data:
                        tag, data, depth = self.helper.server_unpack(data)
                        if tag == TAG_GET:
                        # Add output channel for response
                            # print " received a request for url"
                            if s not in self.outputs :
                                self.outputs.append(s)
                        elif tag == TAG_PUT:
                            print " received " , data , "from ",s.getpeername()
                            for ele in data:
                                self.message_queues.put([ele,depth])
                            s.send('done')
                    else:
                        #Interpret empty result as closed connection
                        print "  closing", client_address
                        if s in self.outputs :
                            self.outputs.remove(s)
                        self.inputs.remove(s)
                        s.close()
            for s in writable:
                try:
                    next_msg = self.message_queues.get_nowait()
                except Queue.Empty:
                    print " " , s.getpeername() , 'queue empty'
                    s.send('wait')
                else:
                    print " sending " , next_msg , " to ", s.getpeername()
                    s.send(self.helper.pack(next_msg))
                self.outputs.remove(s)
            for s in exceptional:
                print " exception condition on ", s.getpeername()
                #stop listening for input on the connection
                self.inputs.remove(s)
                if s in self.outputs:
                    self.outputs.remove(s)
                s.close()