Example #1
0
class CMSSWMonCollector(Service):
    """                                                                                                                                                                                            
    Class definition of the dashboard CMSSWMonCollector agent.                                                                                                                                     
    """
    _logger = logging.getLogger("dashboard.collector.CMSSWMonCollector")

    def __init__(self, name, configFile):
        """
        Initializer for the object.
        """
        Service.__init__(self, name, configFile)

        # Hourly purge
        self.PURGE_INTERVAL = 3600

        # DB Table where to store the messages
        self.transfers_db_table = self.param('transfersDBTable')
        self.transfers_db_table_rejected = self.param('rejectedDBTable')
        # Maximum number of messages in the buffer when making a bulk insert 
        self.buffer_size = int(self.param('bufferSize'))

        self.id = self.param('id')
        self.dbsection = self.param('dbsection')  

        self._next_purge = time.time() + self.PURGE_INTERVAL

        # Try to read the local queue
        try:
            self.localQueue = DQS(path = self.param('localQueue'))
        except Exception as e:
            self._logger.error("connection to the local queue failed")

    def run(self):
        """
        Main function of the service. While it is running it inserts messages 
        from the messaging server into the database.
        """
        while self.status() is not None:
            (names, bodies) = ([], [])
            msgCount = 0
            #try:
            for name in self.localQueue:
                if self.localQueue.lock(name):
                    msg = self.localQueue.get_message(name)
                    self.decode_message(msg, bodies)
                    names.append(name)
                    msgCount += 1

                    # Exit the loop when X messages collected
                    if (msgCount >= self.buffer_size):
                        break

            (successes, failures, ellapsed_time, bulk) = self.insert_messages(names, bodies)
            self._logger.info(
                "%d messages to insert for %s, %d successfully and %d failed in %d ms (bulk = %s)"
                % (msgCount, self.id, successes, failures, ellapsed_time, str(bulk))
            )

            self.purge() 

            # Prevent the buffer to run continuously when buffer is not full
            if msgCount != self.buffer_size:
                time.sleep(5)
    
    
    def JSON_format(self, message):
        """
        Decodes messages in JSON format to a dictionary python
        """
        if message.find(chr(4)): # If the last character is an ascii End of Transmission character we need to remove it
            return json.loads(message.split(chr(4))[0])
        else: return json.loads(message)
    
    
    def delete_messages(self, names):
        """
        """
        for name in names:
            self.localQueue.remove(name)

    def purge(self):
        if time.time() < self._next_purge:
            return
        self.localQueue.purge(60, 60)  
        self._next_purge = time.time() + self.PURGE_INTERVAL

    def validate_length(self, bodies):
        a = [len(x.keys()) for x in bodies]
        m = max(a)
        if a[0] < m:
            idx = a.index(m)
            bodies[0], bodies[idx] = bodies[idx], bodies[0]
            self._logger.warning("swap message positions 0 and %s. Missing keys %s" % (idx, [x for x in bodies[0].keys() if x not in bodies[idx].keys()]))
        return bodies
    
    def insert_messages(self, names, bodies):
        """
        """
        start = time.time()
        successes, failures, ellapsed_time, is_bulk = 0, 0, 0, True

        (ctx, dao) = (None, None)
        try:
            # Get a site DAO to work with
            ctx = DAOContext.getDAOContext(section=self.dbsection) 
            dao = DAOFactory.getDAOFactory().getDAOObject(ctx, 'xrootd', 'XRootDDAO')
            
            # Try to make a bulk insert 
            if len(bodies) > 0:
                try:
                    bodies = self.validate_length(bodies)
                    dao.insertMessages(bodies, self.transfers_db_table)
                    successes = len(bodies)
                except Exception as msg:
                    is_bulk = False
                    self._logger.warning("couldn't feed all the data: %s" % msg)
                    self._logger.warning("failed to insert %s messages. Inserting messages one by one" % len(bodies))

                    # Try to insert the messages one by one if any exception
                    for body in bodies: 
                        try:
                            dao.insertMessages(body, self.transfers_db_table)
                            successes += 1
                        except Exception as msg:
                            failures += 1

                            # Try to insert the malformed message in a table without any constraint
                            if self.transfers_db_table_rejected is not None:
                                try:
                                    body['exception'] = str(msg)
                                    dao.insertMessages(body, self.transfers_db_table_rejected)
                                except:
                                    self._logger.warning("Couldn't feed data: %s" % msg)

            ctx.commit()
            self.delete_messages(names)

        except Exception as msg:
            # maybe it would be necessary to manage if something is wrong in the database (downtime for instance)
            self._logger.error("%s" % msg)
            ctx.destroy()
            raise Exception
        end = time.time()
        ms = 1000 * (end - start)
        return (successes, failures, int(ms), is_bulk)

    def decode_message(self, message, bodies):
        """
        """
        try:
            body = message.get_body()
            body = body.replace(', ,', ',')
            msgDict = self.JSON_format(body)
            try:
                if msgDict['fallback'] == True:
                    msgDict['fallback'] = '1'
                else:
                    msgDict['fallback'] = '0'
            except:
                msgDict['fallback'] = '-'

            # convert time since Epoch to datetime
            msgDict['start_date' ]  = datetime.utcfromtimestamp(int( msgDict['start_time'] ) )
            msgDict['end_date'   ]  = datetime.utcfromtimestamp(int( msgDict['end_time'] ) )

            #self._logger.info(msgDict)
        
            bodies.append(msgDict)

        except ValueError as msg:
            self._logger.warning("Impossible to decode the message: %s by JSON" % message)
            self._logger.error(msg)
            #raise msg
        except Exception as msg:
            self._logger.warning("Exception: %s" % msg)
Example #2
0
class ConsumerQueue(StatSig, Process):
    """
       Class represents spawned worker process that will periodically check and
       consume local cache/directory queue. It will initialize associated
       Publisher that will be used to dispatch consumed messages and will also
       spawn a Purger thread that will clean the local cache and keep it with
       the sound data.
    """
    def __init__(self, events, worker=None):
        Process.__init__(self)
        self.shared = Shared(worker=worker)
        super(ConsumerQueue, self).__init__(worker=worker)
        self.name = worker
        self.events = events
        self.sess_consumed = 0

        self.seenmsgs = set()
        self.inmemq = deque()
        self.setup()
        self.purger = Purger(self.events, worker=worker)

    def cleanup(self):
        self.unlock_dirq_msgs(self.seenmsgs)

    def setup(self):
        self.dirq = DQS(path=self.shared.queue['directory'])

        numloop = None
        if (self.shared.topic['bulk'] == 1
                or self.shared.topic['bulk'] >= self.shared.queue['rate']):
            numloop = 1
        elif self.shared.queue['rate'] > self.shared.topic['bulk']:
            numloop = int(self.shared.queue['rate'] /
                          self.shared.topic['bulk'])
        self.pubnumloop = numloop

        self.shared.runtime.update(inmemq=self.inmemq,
                                   pubnumloop=self.pubnumloop,
                                   dirq=self.dirq,
                                   filepublisher=False)
        self.publisher = self.shared.runtime['publisher'](self.events,
                                                          worker=self.name)

    def run(self):
        termev = self.events['term-' + self.name]
        usr1ev = self.events['usr1-' + self.name]
        periodev = self.events['period-' + self.name]
        lck = self.events['lck-' + self.name]
        evgup = self.events['giveup-' + self.name]

        while True:
            try:
                if termev.is_set():
                    self.shared.log.info('Process {0} received SIGTERM'.format(
                        self.name))
                    lck.acquire(True)
                    self.stats()
                    self.publisher.stats()
                    self.cleanup()
                    lck.release()
                    termev.clear()
                    raise SystemExit(0)

                if usr1ev.is_set():
                    self.shared.log.info('Process {0} received SIGUSR1'.format(
                        self.name))
                    lck.acquire(True)
                    self.stats()
                    self.publisher.stats()
                    lck.release()
                    usr1ev.clear()

                if periodev.is_set():
                    self.stat_reset()
                    self.publisher.stat_reset()
                    periodev.clear()

                nmsgs_consume = 1 if self.shared.topic['bulk'] == 1 \
                                else max(self.shared.topic['bulk'], self.shared.queue['rate'])
                if self.consume_dirq_msgs(nmsgs_consume):
                    ret, published = self.publisher.write()
                    if ret:
                        self.remove_dirq_msgs()
                    elif published:
                        self.shared.log.error('{0} {1} giving up'.format(
                            self.__class__.__name__, self.name))
                        self.stats()
                        self.publisher.stats()
                        self.remove_dirq_msgs(published)
                        self.unlock_dirq_msgs(
                            set(e[0]
                                for e in self.inmemq).difference(published))
                        evgup.set()
                        raise SystemExit(0)
                    else:
                        self.shared.log.error('{0} {1} giving up'.format(
                            self.__class__.__name__, self.name))
                        self.stats()
                        self.publisher.stats()
                        self.unlock_dirq_msgs()
                        evgup.set()
                        raise SystemExit(0)

                time.sleep(1 / self.shared.queue['rate'])

            except KeyboardInterrupt:
                self.cleanup()
                raise SystemExit(0)

    def _increm_intervalcounters(self, num):
        now = int(time.time())
        counter = self.shared.statint[self.name]['consumed']
        counter[now] = num + counter.get(now, 0)
        self.shared.statint[self.name]['consumed_periodic'] += num

    def consume_dirq_msgs(self, num=0):
        def _inmemq_append(elem):
            self.inmemq.append(elem)
            self._increm_intervalcounters(1)
            self.sess_consumed += 1
            if num and self.sess_consumed == num:
                self.sess_consumed = 0
                self.seenmsgs.clear()
                return True

        try:
            for name in self.dirq:
                if os.stat(self.shared.queue['directory'] + name).st_size < 8:
                    os.unlink(self.shared.queue['directory'] + name)
                if name in self.seenmsgs:
                    continue
                self.seenmsgs.update([name])
                already_lckd = os.path.exists(self.dirq.get_path(name))
                if not already_lckd and self.dirq.lock(name):
                    if _inmemq_append((name, self.dirq.get_message(name))):
                        return True
                elif already_lckd:
                    if _inmemq_append((name, self.dirq.get_message(name))):
                        return True

        except Exception as e:
            self.shared.log.error(e)

        return False

    def unlock_dirq_msgs(self, msgs=None):
        try:
            msgl = msgs if msgs else self.inmemq
            for m in msgl:
                msg = m[0] if not isinstance(m, str) else m
                if os.path.exists('{0}/{1}'.format(self.dirq.path, msg)):
                    self.dirq.unlock(msg)
            self.inmemq.clear()
        except (OSError, IOError) as e:
            self.shared.log.error(e)

    def remove_dirq_msgs(self, msgs=None):
        try:
            msgl = msgs if msgs else self.inmemq
            for m in msgl:
                msg = m[0] if not isinstance(m, str) else m
                if os.path.exists('{0}/{1}'.format(self.dirq.path, msg)):
                    self.dirq.remove(msg)
            self.inmemq.clear()
        except (OSError, IOError) as e:
            self.shared.log.error(e)
class CMSSWMonCollector(Service):
    """                                                                                                                                                                                            
    Class definition of the dashboard CMSSWMonCollector agent.                                                                                                                                     
    """
    _logger = logging.getLogger("dashboard.collector.CMSSWMonCollector")

    def __init__(self, name, configFile):
        """
        Initializer for the object.
        """
        Service.__init__(self, name, configFile)

        # Hourly purge
        self.PURGE_INTERVAL = 3600

        # DB Table where to store the messages
        self.transfers_db_table = self.param('transfersDBTable')
        self.transfers_db_table_rejected = self.param('rejectedDBTable')
        # Maximum number of messages in the buffer when making a bulk insert 
        self.buffer_size = int(self.param('bufferSize'))

        self.id = self.param('id')
        self.dbsection = self.param('dbsection')  

        self._next_purge = time.time() + self.PURGE_INTERVAL

        # Try to read the local queue
        try:
            self.localQueue = DQS(path = self.param('localQueue'))
        except Exception as e:
            self._logger.error("connection to the local queue failed")

    def run(self):
        """
        Main function of the service. While it is running it inserts messages 
        from the messaging server into the database.
        """
        while self.status() is not None:
            (names, bodies) = ([], [])
            msgCount = 0
            #try:
            for name in self.localQueue:
                if self.localQueue.lock(name):
                    msg = self.localQueue.get_message(name)
                    self.decode_message(msg, bodies)
                    names.append(name)
                    msgCount += 1

                    # Exit the loop when X messages collected
                    if (msgCount >= self.buffer_size):
                        break

            (successes, failures, ellapsed_time, bulk) = self.insert_messages(names, bodies)
            self._logger.info(
                "%d messages to insert for %s, %d successfully and %d failed in %d ms (bulk = %s)"
                % (msgCount, self.id, successes, failures, ellapsed_time, str(bulk))
            )

            self.purge() 

            # Prevent the buffer to run continuously when buffer is not full
            if msgCount != self.buffer_size:
                time.sleep(5)
    
    
    def JSON_format(self, message):
        """
        Decodes messages in JSON format to a dictionary python
        """
        if message.find(chr(4)): # If the last character is an ascii End of Transmission character we need to remove it
            return json.loads(message.split(chr(4))[0])
        else: return json.loads(message)
    
    
    def delete_messages(self, names):
        """
        """
        for name in names:
            self.localQueue.remove(name)

    def purge(self):
        if time.time() < self._next_purge:
            return
        self.localQueue.purge(60, 60)  
        self._next_purge = time.time() + self.PURGE_INTERVAL

    def validate_length(self, bodies):
        a = [len(x.keys()) for x in bodies]
        m = max(a)
        if a[0] < m:
            idx = a.index(m)
            bodies[0], bodies[idx] = bodies[idx], bodies[0]
            self._logger.warning("swap message positions 0 and %s. Missing keys %s" % (idx, [x for x in bodies[0].keys() if x not in bodies[idx].keys()]))
        return bodies
    
    def insert_messages(self, names, bodies):
        """
        """
        start = time.time()
        successes, failures, ellapsed_time, is_bulk = 0, 0, 0, True

        (ctx, dao) = (None, None)
        try:
            # Get a site DAO to work with
            ctx = DAOContext.getDAOContext(section=self.dbsection) 
            dao = DAOFactory.getDAOFactory().getDAOObject(ctx, 'xrootd', 'XRootDDAO')
            
            # Try to make a bulk insert 
            if len(bodies) > 0:
                try:
                    bodies = self.validate_length(bodies)
                    dao.insertMessages(bodies, self.transfers_db_table)
                    successes = len(bodies)
                except Exception as msg:
                    is_bulk = False
                    self._logger.warning("couldn't feed all the data: %s" % msg)
                    self._logger.warning("failed to insert %s messages. Inserting messages one by one" % len(bodies))

                    # Try to insert the messages one by one if any exception
                    for body in bodies: 
                        try:
                            dao.insertMessages(body, self.transfers_db_table)
                            successes += 1
                        except Exception as msg:
                            failures += 1

                            # Try to insert the malformed message in a table without any constraint
                            if self.transfers_db_table_rejected is not None:
                                try:
                                    body['exception'] = str(msg)
                                    dao.insertMessages(body, self.transfers_db_table_rejected)
                                except:
                                    self._logger.warning("Couldn't feed data: %s" % msg)

            ctx.commit()
            self.delete_messages(names)

        except Exception as msg:
            # maybe it would be necessary to manage if something is wrong in the database (downtime for instance)
            self._logger.error("%s" % msg)
            ctx.destroy()
            raise Exception
        end = time.time()
        ms = 1000 * (end - start)
        return (successes, failures, int(ms), is_bulk)

    def decode_message(self, message, bodies):
        """
        """
        try:
            body = message.get_body()
            body = body.replace(', ,', ',')
            body = body.replace(':-nan,', ':null,').replace(':nan,', ':null,')
            msgDict = self.JSON_format(body)
            try:
                if msgDict['fallback'] == True:
                    msgDict['fallback'] = '1'
                else:
                    msgDict['fallback'] = '0'
            except:
                msgDict['fallback'] = '-'

            # convert time since Epoch to datetime
            msgDict['start_date' ]  = datetime.utcfromtimestamp(int( msgDict['start_time'] ) )
            msgDict['end_date'   ]  = datetime.utcfromtimestamp(int( msgDict['end_time'] ) )

            #self._logger.info(msgDict)
        
            bodies.append(msgDict)

        except ValueError as msg:
            self._logger.warning("Impossible to decode the message: %s by JSON" % message)
            self._logger.error(msg)
            #raise msg
        except Exception as msg:
            self._logger.warning("Exception: %s" % msg)