Esempio n. 1
0
    def __init__(self, configuration):
        self.config = configuration
        self.input = None
        self.parser = None
        self.metadata = None

        # Input is required
        arguments = {}
        if 'arguments' in self.config['input']:
            arguments = self.config['input']['arguments']
        if self.config['input']['type'] == 'command':
            self.input = CommandRunner(**arguments)
        elif self.config['input']['type'] == 'file':
            self.input = FileReader(**arguments)
        elif self.config['input']['type'] == 'http':
            self.input = HTTPReader(**arguments)
        elif self.config['input']['type'] == 'class':
            self.input = init_object(self.config['input']['name'], **arguments)
        elif self.config['input']['type'] == 'tailer':
            if 'tailer' in config:
                self.input = Tailer(config['tailer'])
            else:
                raise AttributeError(
                    "Missing tailer in config file for tailer type input")

        assert self.input

        # parser is optional for parsing data collected by input
        if 'parser' in self.config:
            arguments = {}
            if 'arguments' in self.config['parser']:
                arguments = self.config['parser']['arguments']
            if self.config['parser']['type'] == 'match':
                self.parser = MatchParser(
                    self.config['parser']['pattern'].strip(),
                    self.config['parser']['transform'].strip())
            elif self.config['parser']['type'] == 'split':
                self.parser = SplitParser(
                    self.config['parser']['delimiter'].strip(),
                    self.config['parser']['transform'].strip())
            elif self.config['parser']['type'] == 'dummy':
                self.parser = DummyParser()
            elif self.config['parser']['type'] == 'json':
                self.parser = JsonGrepParser(**arguments)
            elif self.config['parser']['type'] == 'class':
                self.parser = init_object(self.config['parser']['name'],
                                          **arguments)

        self._max_error_count = self.config['input'].get('max_error_count', -1)
        self._current_data = None
        self._number_collected = 0
        self._number_failed = 0
        self._error_count = 0

        self._output = create_output(config['output'])

        if 'metadata' in self.config:
            self.metadata = self.config['metadata']
 def __init__(self, collector_name, config, output, tailer):
     threading.Thread.__init__(self, name=collector_name)
     self.__collector_name=collector_name
     self.__config=config
     self.__sleep_time=self.__config['input'].get('frequency',10)
     self.__cron=self.__config['input'].get('schedule',None)
     self.__schedule=None
     if self.__cron is not None:
         self.__schedule=CronEvent(self.__cron)
         log.debug("job scheduled at %s"%self.__schedule.numerical_tab)
     self.__input=None
     self.__parser=None
     self.__output=output
     if self.__config['input']['type']=='command':
         self.__input=CommandRunner(self.__config['input']['source'])
     elif self.__config['input']['type']=='file':
         self.__input=FileReader(self.__config['input']['path'])
     elif self.__config['input']['type']=='http':
         #log.debug('input %s'%self.__config['input'])
         url=self.__config['input']['url']
         headers=self.__config['input'].get('headers', {})
         #log.debug('headers %s'%headers)
         auth=self.__config['input'].get('auth', None)
         self.__input=HTTPReader(url, headers, auth)
     elif self.__config['input']['type']=='class':
         arguments={}
         if 'arguments' in self.__config['input']:
             arguments=self.__config['input']['arguments']
         self.__input=init_object(self.__config['input']['name'], **arguments)
     elif self.__config['input']['type']=='tailer':
         self.__input=tailer
     if 'parser' in self.__config:
         if self.__config['parser']['type']=='match':
             self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip())
         elif self.__config['parser']['type']=='split':
             self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip())
         elif self.__config['parser']['type']=='dummy':
             self.__parser=DummyParser()
         elif self.__config['parser']['type']=='json':
             arguments={}
             if 'arguments' in self.__config['parser']:
                 arguments=self.__config['parser']['arguments']
             self.__parser=JsonGrepParser(**arguments)
         elif self.__config['parser']['type']=='class':
             arguments={}
             if 'arguments' in self.__config['parser']:
                 arguments=self.__config['parser']['arguments']
             self.__parser=init_object(self.__config['parser']['name'], **arguments)
     self.__running=True
     self.__session_id=str(uuid.uuid4())
     self.__max_error_count=self.__config['input'].get('max_error_count', -1)
     self.__current_data=None
     self.__number_collected=0
     self.__number_failed=0
     self.__sleep_count=0
     self.__error_count=0
     self.__last_check_minute=-1
Esempio n. 3
0
    def init(self):
        if 'global' in config:
            global_vars=config['global']
            set_global(global_vars)
        for n,cfg in config['output'].iteritems():
            if n=='buffer':
                if not 'directory' in cfg:
                    print("ERROR: buffer directory not specified in config.")
                    return False
                buffer_dir=cfg['directory']
                if os.path.exists(buffer_dir) and (not os.path.isdir(buffer_dir)):
                    print("ERROR: buffer directory exists but it is not a directory.")
                    return False
                if not os.path.exists(buffer_dir):
                    log.info("Creating buffer directory %s." % buffer_dir)
                    os.makedirs(buffer_dir)
                self.__outputs[n]=BufferOutput(cfg)
            elif n=='kafka-http':
                self.__outputs[n]=KafkaHTTPOutput(cfg)
            elif n=='file':
                self.__outputs[n]=FileOutput(cfg)
            elif 'class' in cfg:
                arguments={}
                if 'arguments' in cfg:
                    arguments=cfg['arguments']
                self.__outputs[n]=init_object(cfg['class'], **arguments)

        if 'pusher' in config:
            if not 'directory' in config['pusher'] or not 'output' in config['pusher']:
                print("ERROR: need to speficity directory and output in pusher.")
                return False
        if 'tailer' in config:
            self.__tailer=Tailer(config['tailer'])
        return True
Esempio n. 4
0
def create_output(config):
    """Create an output object"""
    # all output classes take one single config argument
    # this is different to input classes
    arguments = {}
    if 'arguments' in config:
        arguments = config['arguments']
    return init_object(config['class'], arguments)
Esempio n. 5
0
def create_input(input_config, **kwargs):
    # Only support class type now
    # these class needs to take whatever caller set

    if 'class' in input_config:
        class_name = input_config['class']
    else:
        assert input_config['type'] == 'class'
        class_name = input_config['name']

    if kwargs is None:
        if 'arguments' in input_config:
            kwargs = input_config['arguments']
    return init_object(class_name, **kwargs)
Esempio n. 6
0
    def __init__(self, collector_name, config, output, tailer=None):
        threading.Thread.__init__(self, name=collector_name)
        self.__collector_name=collector_name
        self.__config=config
        self.__sleep_time=self.__config['input'].get('frequency',10)
        self.__cron=self.__config['input'].get('schedule',None)
        self.__schedule=None
        if self.__cron is not None:
            self.__schedule=CronEvent(self.__cron)
            log.debug("job scheduled at %s"%self.__schedule.numerical_tab)
        self.__input=None
        self.__parser=None
        self.__output=output

        if self.__config['input']['type']=='command':
            self.__input=CommandRunner(self.__config['input']['source'])
        elif self.__config['input']['type']=='file':
            self.__input=FileReader(self.__config['input']['path'])
        elif self.__config['input']['type']=='http':
            #log.debug('input %s'%self.__config['input'])
            url=self.__config['input']['url']
            headers=self.__config['input'].get('headers', {})
            #log.debug('headers %s'%headers)
            auth=self.__config['input'].get('auth', None)
            self.__input=HTTPReader(url, headers, auth)
        elif self.__config['input']['type']=='class':
            arguments={}
            if 'arguments' in self.__config['input']:
                arguments=self.__config['input']['arguments']
            self.__input=init_object(self.__config['input']['name'], **arguments)
        elif self.__config['input']['type']=='tailer':
            if tailer is None:
                raise AttributeError("Missing tailer in config file for tailer type input")
            self.__input=tailer

        assert(self.__input)

        if 'parser' in self.__config:
            if self.__config['parser']['type']=='match':
                self.__parser=MatchParser(self.__config['parser']['pattern'].strip(), self.__config['parser']['transform'].strip())
            elif self.__config['parser']['type']=='split':
                self.__parser=SplitParser(self.__config['parser']['delimiter'].strip(), self.__config['parser']['transform'].strip())
            elif self.__config['parser']['type']=='dummy':
                self.__parser=DummyParser()
            elif self.__config['parser']['type']=='json':
                arguments={}
                if 'arguments' in self.__config['parser']:
                    arguments=self.__config['parser']['arguments']
                self.__parser=JsonGrepParser(**arguments)
            elif self.__config['parser']['type']=='class':
                arguments={}
                if 'arguments' in self.__config['parser']:
                    arguments=self.__config['parser']['arguments']
                self.__parser=init_object(self.__config['parser']['name'], **arguments)
        self.__running=True
        self.__session_id=str(uuid.uuid4())
        self.__max_error_count=self.__config['input'].get('max_error_count', -1)
        self.__current_data=None
        self.__number_collected=0
        self.__number_failed=0
        self.__sleep_count=0
        self.__error_count=0
        self.__last_check_minute=-1