Beispiel #1
0
class UmcWriter(object):
    def __init__(self, config, writerDef):
        self.config = config
        self.writerDef = writerDef
        self.writer_id = writerDef["writer-id"]

        # read common writer's params
        base_key = "common.umcpush.writer-params"
        self.params = Map(
            delay_writes=self.config.value(base_key + ".delay-writes", 0.2),
            delay_runs=self.config.value(base_key + ".delay-runs", 10),
            connection_retry_count=self.config.value(
                base_key + ".connection-retry-count", 5),
            connection_retry_interval=self.config.value(
                base_key + ".connection-retry-interval", 10),
            write_interval=self.config.value(base_key + ".write-interval", 0),
        )

        # base key for this writer's configuration
        #self.base_key="common.umcpush.{writer_id}.".format(writer_id=self.writer_id)

        # update any specific writer's param of this writer
        # update any value that may be overriden in writer's specific parameters
        wparams = self.param("writer-params")
        if wparams is not None:
            for k, v in wparams.items():
                k = k.replace("-", "_")
                # update only params that exist in common params
                if self.params.get(k) is not None:
                    self.params[k] = v
                else:
                    # this param may be used in child's configuration
                    pass

    # // init

    # def param_key(self,param_name):
    #     return self.base_key + param_name

    def param(self, param_name, default=None):
        return self.config.value_element(self.writerDef, param_name, default)

    def read_umcdef(self, umc_id, umcconf):
        writers = self.config.value_element(umcconf, "writers", [])
        for writer in writers:
            if writer["writer-id"] == self.writer_id:
                return Map(enabled=self.config.value_element(
                    writer, "enabled", True),
                           writerDef=writer)

        # writer definition for this umc instance has not been found
        return Map(enabled=False, writerDef=None)
Beispiel #2
0
 def run_task(self, GlobalContext, tdef):    
     if GlobalContext.umcdefs is not None:
         for ud in GlobalContext.umcdefs:
             if ud.enabled:
                 ud.lock.acquire()
                 try:
                     log_stats=Map(backlog_total=0, errorlog_mtime=0, errorlog_size=0, errorlog_tail=[])                    
                     log_dir=get_umc_instance_log_dir(ud.umc_instanceid, GlobalContext)                
                     
                     if os.path.isdir(log_dir):
                         for file in [os.path.basename(f.path) for f in scandir(log_dir)]:
                             # match the log file waiting to be consumed
                             # there is a maximum of 9 groups (1-9)
                             m1 = re.match(r"^{umc_instanceid}_[0-9\-]+.log.([1-9])$".format(umc_instanceid=ud.umc_instanceid), file) 
                             if m1:
                                 fg_key="backlog_group_%s"%m1.group(1)
                                 if log_stats.get(fg_key) is None:
                                     log_stats[fg_key]=1
                                 else:
                                     log_stats[fg_key]+=1
                                 log_stats.backlog_total += 1
                             # // if match log file
                             
                             # match the error log
                             m2 = re.match(r"^{umc_instanceid}(_[0-9\-]+)?.error.out$".format(umc_instanceid=ud.umc_instanceid), file) 
                             if m2:
                                 stat=os.stat(log_dir + "/" + file)
                                 log_stats.errorlog_size=stat.st_size
                                 if log_stats.errorlog_size>0:
                                     log_stats.errorlog_mtime=stat.st_mtime
                                 else:
                                     log_stats.errorlog_mtime=0
                                 #the below takes too much time to finish, better not run this
                                 #log_stats.errorlog_tail=utils.tail(log_dir + "/" + file, 10)
                             # // if match error log
                         # // for 
                     else:
                         Msg.warn_msg("Directory %s does not exist!"%log_dir)
                     
                     # update log stats
                     ud.log_stats = log_stats                    
                 finally:
                     ud.lock.release()
             # // if enabled
         # // for
     # // if 
     
     return True           
Beispiel #3
0
class UmcReader:
    def __init__(self, config, writer_id):
        self.config = config

        # read common reader's params
        base_key = "common.umcpush.reader-params"
        self.params = Map(
            max_batchsize_rows=self.config.value(
                base_key + ".max-batchsize-rows", 50),
            max_batchsize_files=self.config.value(
                base_key + ".max-batchsize-files", 300),
            log_file_group=self.config.value(base_key + ".log-file-group", 1),
            common_tags=self.config.value(base_key +
                                          ".common-tags").split(','),
            common_fields=self.config.value(base_key +
                                            ".common-fields").split(','),
            default_timefield=self.config.value(
                base_key + ".default-timefield", "datetime"),
            default_timeformat=self.config.value(
                base_key + ".default-timeformat", "%Y-%m-%d %H:%M:%S"),
            tzoffset=utils.float_ex(
                self.config.value(base_key + ".tzoffset", 0), 0))

        # update any value that may be overriden in writer's specific parameters
        writers = config.value("common.umcpush.writers")
        for writer in writers:
            if writer["writer-id"] == writer_id:
                rparams = writer["reader-params"]
                if rparams is not None:
                    for k, v in rparams.items():
                        k = k.replace("-", "_")
                        if self.params.get(k):
                            self.params[k] = v
                        else:
                            Msg.warn_msg(
                                "The reader param %s is invalid in %s" %
                                (k, key))

    # *** reads and checks umc definition for a specific umc id
    def read_umcdef(self, umc_id, umcconf):
        # tags and fields cols of this umc definition
        tcols = [
            x.strip() for x in self.config.value_element(
                umcconf, "reader.tags").split(',') if x != ''
        ]
        fcols = [
            x.strip() for x in self.config.value_element(
                umcconf, "reader.fields").split(',') if x != ''
        ]

        # combine with common tags and fields cols
        tcols.extend(x for x in [y.strip() for y in self.params.common_tags]
                     if x != '' and x not in tcols and '!' + x not in tcols)
        fcols.extend(x for x in [y.strip() for y in self.params.common_fields]
                     if x != '' and x not in fcols and '!' + x not in tcols)

        # remove all commented out fields and tags
        tcols = [x for x in tcols if not (x.startswith('!'))]
        fcols = [x for x in fcols if not (x.startswith('!'))]

        # read and check time field and its format
        timeformat = self.config.value_element(umcconf, "reader.timeformat",
                                               self.params.default_timeformat)
        try:
            if timeformat not in ['_unix_', '_time_s_', '_time_ms_']:
                strftime(timeformat, gmtime())
        except Exception as e:
            raise Exception(
                "The time format '%s' is invalid for umc '%s': %s!" %
                (timeformat, umc_id, e))

        timefield = self.config.value_element(umcconf, "reader.timefield",
                                              self.params.default_timefield)
        tzfield = self.config.value_element(umcconf, "reader.tzfield", None)

        filter = self.config.value_element(umcconf, "reader.filter", None)

        # transformation expressions
        transform = self.config.value_element(umcconf, "reader.transform",
                                              None)

        return Map(tcols=tcols,
                   fcols=fcols,
                   timeformat=timeformat,
                   timefield=timefield,
                   tzfield=tzfield,
                   filter=filter,
                   transform=transform)

    # // read_umcdef

    # unix time
    def unix_time_millis(self, dt):
        return int((dt - epoch).total_seconds() * 1000)

    # retrieves the first batch of log files sorted by modified time
    def get_batch_logs(self, logDir, umc_instanceids, files_in_buffer=[]):
        pattern = re.compile(".+_[0-9]+.*\.log.{log_file_group}$".format(
            log_file_group=self.params.log_file_group))
        search_re = logDir + "/[a-zA-Z0-9\._\-]+/([a-zA-Z0-9\-\._]+)"  # + "|".join(GlobalContext.config.umc_instanceids(False)) + ")$";

        batch = []
        cnt = 0
        for dirname, dirnames, filenames in walk(logDir):
            #Msg.info1_msg("walk: %s, filenames=%d"%(dirname,len(filenames)))
            m = re.match(search_re, dirname)
            if m and m.group(1) in umc_instanceids:
                for filename in filenames:
                    fullfname = os.path.join(dirname, filename)
                    if fullfname not in files_in_buffer and pattern.match(
                            filename):
                        cnt = cnt + 1
                        if cnt <= self.params.max_batchsize_files:
                            batch.append(fullfname)
            if cnt > self.params.max_batchsize_files:
                break
        return sorted(batch, key=lambda fn: os.stat(fn).st_mtime, reverse=True)

    # // get_batch_logs

    # read data points from a single log file
    def read_datapoints(self, logfilename, umcdef, create_writeitem_func):
        datapoints = []
        notags = False
        nofields = False
        tzoffset = self.params.tzoffset

        if umcdef.enabled:
            # read datapoints
            with open(logfilename, 'r') as csvfile:
                reader = csv.DictReader(csvfile, delimiter=',')
                for row in reader:
                    # remove None keys
                    row = {k: v for k, v in row.items() if k is not None}

                    # timestamp
                    try:
                        if not (umcdef.reader.timefield in row):
                            raise ValueError("Cannot find time field '" +
                                             umcdef.reader.timefield +
                                             "' in data row!")
                        if umcdef.reader.timeformat == "_unix_" or umcdef.reader.timeformat == "_time_s_":
                            timestamp = long(
                                row[umcdef.reader.timefield]) * 1000000000
                        elif umcdef.reader.timeformat == "_time_ms_":
                            timestamp = long(
                                row[umcdef.reader.timefield]) * 1000000
                        else:
                            if umcdef.reader.tzfield is not None and umcdef.reader.tzfield in row:
                                tzoffset = utils.float_ex(
                                    row[umcdef.reader.tzfield],
                                    self.params.tzoffset)
                            timestamp = (self.unix_time_millis(
                                datetime.datetime.strptime(
                                    row[umcdef.reader.timefield],
                                    umcdef.reader.timeformat)) - int(
                                        tzoffset * 60 * 60 * 1000)) * 1000000
                    except Exception as e:
                        # output error and skip this row
                        Msg.err_msg(
                            "Cannot read or convert time to timestamp for %s: %s"
                            % (umcdef.umcid, str(e)))
                        continue

                    # create tags and fields
                    tags = {
                        k: str(v)
                        for k, v in row.items() if k in umcdef.reader.tcols
                    }
                    fields = {
                        k: utils.float_ex(v)
                        for k, v in row.items() if k in umcdef.reader.fcols
                    }
                    notags = (len(tags) == 0)

                    # only add this row if there is at least one field with some value
                    if len([v
                            for k, v in fields.items() if v is not None]) > 0:
                        # evaluate transformations
                        if umcdef.reader.transform is not None:
                            tags, fields = eval_transform(
                                umcdef.reader.transform, timestamp, tags,
                                fields)

                        # only add this row if filter holds on this row or there is no filter
                        if umcdef.reader.filter is None or eval_filter(
                                umcdef.reader.filter, timestamp, tags, fields):
                            try:
                                records = create_writeitem_func(
                                    umcdef, timestamp, fields, tags)
                                if records is not None and isinstance(
                                        records, list):
                                    datapoints += records
                            except Exception as e:
                                Msg.err_msg(
                                    "Error occured while creating data points item: %s"
                                    % str(e))
                        # // if write data

                # // end reading rows
            # // end open file

        # check for no tags
        if notags and len(datapoints) > 0:
            Msg.warn_msg(
                "The definition of %s contains no tags presented in the log file %s!"
                % (umcdef.umcid, os.path.basename(logfilename)))

        return datapoints