class JobCommand(GeneratingCommand):
    handler = Option(require=True)
    search_name = Option(require=False)
    argv = Option(require=False)
    repeat_on_error = Option(default=True, validate=validators.Boolean())
    repeat_on_success = Option(default=True, validate=validators.Boolean())

    def generate(self):
        logging_handler = LoggingHandler()
        logger = get_logger()
        #root_logger = logging.getLogger()
        #configure_logger(root_logger)
        logger.addHandler(logging_handler)
        #root_logger.addHandler(get_handler())

        logger.debug("running ...")
        try:
            func = get_method(self.handler)
            if self.argv:
                try:
                    argv = json.loads(self.argv)
                except json.JSONDecodeError as e:
                    err_msg = traceback.format_exc()
                    logger.error("unable to decode argv: %s" % err_msg)
                    #raise Stop()
                    argv = []
            else:
                argv = []
            func(self.service, *argv)
            logger.debug("done")
            if self.repeat_on_success:
                return
        except exceptions.Repeat:
            logger.debug("will repeat")
            return
        except exceptions.Stop:
            logger.debug("will stop and not repeat")
        except Exception as e:
            err_msg = traceback.format_exc()
            #err_msg = "..............."
            logger.error("exception during job execution: %s" % err_msg)
            if self.repeat_on_error:
                logger.debug("will repeat")
                return
        finally:
            logging.shutdown()
            for e in logging_handler.events:
                yield e

        if self.search_name:
            self.service.saved_searches.delete(self.search_name)
        else:
            logger.error("missing search_name")
Beispiel #2
0
class snowIncidentCommand(GeneratingCommand):

    assigned = Option(require=True, validate=validators.List())
    assigned_by = Option(require=False)
    daysAgo = Option(require=False, validate=validators.Integer(0))
    active = Option(require=True, validate=validators.Boolean())
    limit = Option(require=False, validate=validators.Integer(0))
    env = Option(require=False)

    def generate(self):
        self.logger.debug('snowIncidentCommand: %s', self)
        searchinfo = self.metadata.searchinfo
        app = AppConf(searchinfo.splunkd_uri, searchinfo.session_key)
        env = self.env.lower() if self.env else 'production'
        conf = app.get_config('getsnow')[env]
        assigned_by = 'assignment_group' if self.assigned_by == 'group' else 'assigned_to'
        assignment = {'table': 'sys_user_group', 'field': 'name'} if self.assigned_by == 'group' else {'table': 'sys_user', 'field': 'user_name'}
        limit = self.limit if self.limit else 10000
        snowincident = snow(conf['url'], conf['user'], conf['password'])
        sids = snowincident.getsysid(assignment['table'], assignment['field'], self.assigned)
        filters = snowincident.filterbuilder(assigned_by, sids)
        glide = 'sys_created_on>=javascript:gs.daysAgo({})'.format(self.daysAgo) if self.daysAgo else ''
        url = snowincident.reqencode(filters, table='incident', glide_system=glide, active=self.active, sysparm_limit=limit)
        for record in snowincident.getrecords(url):
            record = snowincident.updatevalue(record, sourcetype='snow:incident')
            record['_raw'] = json.dumps(record)
            record = dictexpand(record)
            yield record
    def test_boolean(self):

        truth_values = {
            '1': True,
            '0': False,
            't': True,
            'f': False,
            'true': True,
            'false': False,
            'y': True,
            'n': False,
            'yes': True,
            'no': False
        }

        validator = validators.Boolean()

        for value in truth_values:
            for variant in value, value.capitalize(), value.upper():
                for s in unicode(variant), bytes(variant):
                    self.assertEqual(validator.__call__(s),
                                     truth_values[value])

        self.assertIsNone(validator.__call__(None))
        self.assertRaises(ValueError, validator.__call__, 'anything-else')

        return
class StubbedReportingCommand(ReportingCommand):
    boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        require=False, validate=validators.Boolean())

    duration = Option(
        doc='''
        **Syntax:** **duration=***<value>*
        **Description:** A length of time''',
        validate=validators.Duration())

    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<value>*
        **Description:** Name of a field''',
        validate=validators.Fieldname())

    file = Option(
        doc='''
        **Syntax:** **file=***<value>*
        **Description:** Name of a file''',
        validate=validators.File(mode='r'))

    integer = Option(
        doc='''
        **Syntax:** **integer=***<value>*
        **Description:** An integer value''',
        validate=validators.Integer())

    optionname = Option(
        doc='''
        **Syntax:** **optionname=***<value>*
        **Description:** The name of an option (used internally)''',
        validate=validators.OptionName())

    regularexpression = Option(
        doc='''
        **Syntax:** **regularexpression=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.RegularExpression())

    set = Option(
        doc='''
        **Syntax:** **set=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.Set("foo", "bar", "test"))

    @Configuration()
    def map(self, records):
        pass

    def reduce(self, records):
        pass
Beispiel #5
0
class Joiner(EventingCommand):
    """
    combines transactions results into one json, assuming each of those individual results is a json
    """
    should_list = Option(
        doc="should generate list instead of overwriting conflicting keys",
        default=False,
        validate=validators.Boolean())

    def transform(self, records):
        """
        applies the appropriate update function to each record
        """
        updating_func = type(self)._update_and_overwrite
        if self.should_list:
            updating_func = self._update_and_list

        for record in records:
            unified_json = {}
            individual_json_list = record['_raw'].split('\n')
            updating_func(unified_json, individual_json_list)
            record['_raw'] = json.dumps(unified_json)
            yield record

    @staticmethod
    def _update_and_overwrite(unified_json, individual_json_list):
        for doc in individual_json_list:
            loaded = json.loads(doc)
            unified_json.update(
                loaded
            )  # update function automatically overwrites duplicate keys

    def _update_and_list(self, unified_json, individual_json_list):
        for doc in individual_json_list:
            loaded = json.loads(doc)
            for key, value in loaded.items():
                existing_value = unified_json.get(key)
                if key in self.fieldnames and existing_value:  # where self.fieldnames are keys NOT to list
                    continue
                elif key in self.fieldnames and not existing_value:
                    unified_json[key] = value
                else:
                    type(self)._do_safe_update(unified_json, existing_value,
                                               key, value)

    @staticmethod
    def _do_safe_update(unified_json, existing_value, key, value):
        if existing_value:
            unified_json[key].append(value)
        else:
            unified_json[key] = [value]
class CloudgatewayHttpsCheck(GeneratingCommand):
    """
    This command checks spacebridge reachability by using twisted to connect to the websocket echo endpoint and sending
    a message.  The test is considered a success if it gets back the message it sent within 10 seconds.
    By default it will inherit Splunk's proxy settings and use them.  In the command you can disable the proxy by
    passing useProxy=False.
    """
    useProxy = Option(require=False, validate=validators.Boolean(), default=True)

    def __init__(self):
        super(CloudgatewayHttpsCheck, self).__init__()
        self.echo_state = EchoState()

    def timeout(self):
        self.echo_state.message = 'Timeout'
        reactor.stop()

    def test_wss(self):
        ws_url = "wss://{}/echo".format(config.get_spacebridge_server())

        headers = {'Authorization': "f00d"}

        use_proxy = self.useProxy

        proxy, auth = config.get_ws_https_proxy_settings()

        if use_proxy:
            # Proxy setup
            if auth:
                headers['Proxy-Authorization'] = 'Basic ' + auth
        else:
            proxy = None

        factory = WebSocketClientFactory(ws_url, headers=headers, proxy=proxy)
        factory.protocol = CheckMobileWssProtocol
        factory.state = self.echo_state

        connectWS(factory)

        reactor.callLater(10, self.timeout)

        reactor.run()

        record = {'websocket': self.echo_state.ok, 'message': self.echo_state.message}
        return record

    def generate(self):
        record = self.test_wss()

        yield record
class CloudgatewayAsyncCheck(GeneratingCommand):
    """
    This command checks spacebridge reachability by using twisted to make an http call to the health check endpoint.
    Any http return other than 200 is considered a failure.  By default it will inherit Splunk's proxy settings and use
    them.  In the command you can disable the proxy by passing useProxy=False.
    """
    useProxy = Option(require=False,
                      validate=validators.Boolean(),
                      default=True)

    def __init__(self):
        super(CloudgatewayAsyncCheck, self).__init__()
        self.echo_state = EchoState()

    def run(self):
        proxy = config.get_https_proxy_settings()
        uri = "{}/health_check".format(config.get_spacebridge_domain())

        if not self.useProxy:
            proxy = None

        client = AsyncClient(treq=noverify_treq_instance(https_proxy=proxy))

        def done(result):
            if result.code == 200:
                self.echo_state.ok = True
            else:
                self.echo_state.message = 'Got http {}'.format(result.code)
            reactor.stop()

        def err(failure):
            self.echo_state.message = failure
            reactor.stop()

        d = client.async_get_request(uri, None)
        d.addCallback(done)
        d.addErrback(err)

        reactor.run()
        return {
            'https_async': self.echo_state.ok,
            'message': self.echo_state.message
        }

    def generate(self):
        yield self.run()

    ''' HELPERS '''
Beispiel #8
0
class DnsLookupCommand(StreamingCommand):
    perevent = Option(
        doc='''
        **Syntax:** **perevent=***<perevent>*
        **Description:** create uuid per event''',
        require=False,validate=validators.Boolean(), default="false")

    def stream(self, records):
        guid = uuid.uuid4()

        for record in records:
            if self.perevent == True:
                guid = uuid.uuid4()
            
            record['uuid'] = str(guid)
            yield record
class Vader(StreamingCommand):
    """ Returns sentiment score between -1 and 1, can also return detailed sentiment values.

    ##Syntax

    .. code-block::
        vader textfield=<field>

    ##Description

    Sentiment analysis using Valence Aware Dictionary and sEntiment Reasoner
    Using option full_output will return scores for neutral, positive, and negative which
    are the scores that make up the compound score (that is just returned as the field
    "sentiment". Best to feed in uncleaned data as it takes into account capitalization
    and punctuation.

    ##Example

    .. code-block::
        * | vader textfield=sentence
    """

    textfield = Option(require=True,
                       doc='''
        **Syntax:** **textfield=***<fieldname>*
        **Description:** Name of the field that will contain the text to search against''',
                       validate=validators.Fieldname())

    full_output = Option(default=False,
                         doc='''
        **Syntax:** **full_output=***<fieldname>*
        **Description:** If true, returns full sentiment values--neutral, positive, and negative--otherwise only compound is reutrned''',
                         validate=validators.Boolean())

    def stream(self, records):
        sentiment_analyzer = SentimentIntensityAnalyzer()
        for record in records:
            polarity = sentiment_analyzer.polarity_scores(
                record[self.textfield])
            record['sentiment'] = polarity['compound']
            if self.full_output:
                record['sentiment_neutral'] = polarity['neu']
                record['sentiment_negative'] = polarity['neg']
                record['sentiment_positive'] = polarity['pos']

            yield record
class SecureGatewayAsyncCheck(GeneratingCommand):
    """
    This command checks spacebridge reachability by using twisted to make an http call to the health check endpoint.
    Any http return other than 200 is considered a failure.  By default it will inherit Splunk's proxy settings and use
    them.  In the command you can disable the proxy by passing useProxy=False.
    """
    useProxy = Option(require=False,
                      validate=validators.Boolean(),
                      default=True)

    def __init__(self):
        super(SecureGatewayAsyncCheck, self).__init__()
        self.echo_state = EchoState()

    async def run(self):
        proxy = config.get_https_proxy_settings()
        uri = "{}/health_check".format(config.get_spacebridge_domain())

        if not self.useProxy:
            proxy = None

        client = AsyncClient(AioHttpClient(proxy=proxy))

        try:
            result = await client.async_get_request(uri, None)
            if result.code == 200:
                self.echo_state.ok = True
            else:
                self.echo_state.message = 'Got http {}'.format(result.code)
        except Exception as e:
            self.echo_state.message = str(e)

        return {
            'https_async': self.echo_state.ok,
            'message': self.echo_state.message
        }

    def generate(self):
        loop = asyncio.new_event_loop()
        r = loop.run_until_complete(self.run())
        loop.close()

        yield r

    ''' HELPERS '''
Beispiel #11
0
class SecureGatewayHttpsCheck(GeneratingCommand):
    """
    This command checks spacebridge reachability by using requests to make an http call to the health check endpoint.
    Any http return other than 200 is considered a failure.  By default it will inherit Splunk's proxy settings and use
    them.  In the command you can disable the proxy by passing useProxy=False.
    """
    useProxy = Option(require=False, validate=validators.Boolean(), default=True)

    def generate(self):
        spacebridge_server = config.get_spacebridge_domain()

        url = "{}/health_check".format(spacebridge_server)

        proxies = config.get_proxies()

        # Unset proxy, if unsetProxy = True
        if not self.useProxy:
            proxies = {}

        # Load data from REST API
        try:
            response = requests.get(
                url,
                proxies=proxies,
                timeout=15
            )

            response.raise_for_status()
            healthy = {'https_sync': True}

        except requests.exceptions.HTTPError as err:
            healthy = {'https_sync': False, 'message': str(err)}
        except ProxyError as err:
            healthy = {'https_sync': False, 'message': str(err)}
        except requests.ConnectionError as err:
            healthy = {'https_sync': False, 'message': str(err)}

        yield healthy

    ''' HELPERS '''
Beispiel #12
0
class AnkitCommand(GeneratingCommand):

    firstname = Option(require=True, validate=validators.OptionName())
    lastname = Option(require=True, validate=validators.OptionName())
    botn = Option(require=False, validate=validators.Boolean())

    def generate(self):
        url = 'http://api.icndb.com/jokes/random?limitTo=nerdy&firstName={}&lastName={}'.format(
            self.firstname, self.lastname)
        response = requests.get(url)
        value = json.loads(response.text)['value']
        joke = value['joke']

        botn = self.botn
        botn_response_value = ""
        conc_string = joke
        if botn == True:
            botn_url = 'http://botn.splunk.link:8000/ep'
            response = requests.get(botn_url)
            botn_response_value = response.text
            conc_string = conc_string + "\n BOTN response: " + response.text

        yield {'_time': time.time(), '_raw': conc_string}
class CloudgatewayHttpsCheck(GeneratingCommand):
    """
    This command will allow a user to check if any webpage is reachable by allowing a user to enter a url string. 
    At this time, a 404 from either ims.prod-nlp.spl.mobi or auths.prod-nlp.spl.mobi is considered a success for connectivity. 
    Any http return other than 200 or 404 is considered a failure.  By default it will inherit Splunk's proxy settings and use
    them.  In the command you can disable the proxy by passing useProxy=False.
    """

    useProxy = Option(require=False,
                      validate=validators.Boolean(),
                      default=True)
    url = Option(require=True)

    def generate(self):

        url = self.url
        proxies = config.get_proxies()

        # Unset proxy, if unsetProxy = True
        if not self.useProxy:
            proxies = {}

        # Load data from REST API
        try:
            response = requests.get(url, proxies=proxies, timeout=15)

            response.raise_for_status()
            healthy = {'connected': True}

        except requests.exceptions.HTTPError as err:
            healthy = {'connected': False, 'message': err.message}
        except ProxyError as err:
            healthy = {'connected': False, 'message': err.message}

        yield healthy

    ''' HELPERS '''
class MakeAlertsCommand(StreamingCommand):
    time = Option(doc='''
        **Syntax:** **time=***<field>*
        **Description:** Field name used to determine event time for the alert''',
                  require=False,
                  validate=validators.Fieldname(),
                  default='_time')
    entity = Option(doc='''
        **Syntax:** **entity=***<field>*
        **Description:** Field name used to determine the entity triggering the alert (account name, machine name, ...)''',
                    require=False,
                    validate=validators.Fieldname(),
                    default='entity')
    alert_type = Option(doc='''
        **Syntax:** **type=***<string>*
        **Description:** Field name used to determine the type of alert''',
                        require=True,
                        name='type')
    severity = Option(doc='''
        **Syntax:** **severity=***<field>*
        **Description:** Field name used to set severity of the alert''',
                      require=False,
                      validate=validators.Fieldname(),
                      default=None)
    idfield = Option(doc='''
        **Syntax:** **idfield=***<field>*
        **Description:** Field name used to store the alert id''',
                     require=False,
                     default=None,
                     validate=validators.Fieldname())
    combine = Option(doc='''
        **Syntax:** **combine=***"<fields>"*
        **Description:** Comma separated field names where alerts should be combined instead of creating new ones.''',
                     require=False,
                     default=None)
    combine_window = Option(doc='''
        **Syntax:** **combine_window=***<string>*
        **Description:** hours or days. ''',
                            require=False,
                            default=None)
    interactive = Option(doc='''
        **Syntax:** **interactive=***<bool>*
        **Description:** If true, makealerts can run in an interactive search, otherwise it will run only in scheduled
        search (this is to prevent alerts created accidentally when copy and pasting scheduled search text)''',
                         require=False,
                         default=False,
                         validate=validators.Boolean())
    preview = Option(doc='''
        **Syntax:** **preview=***<bool>*
        **Description:** If true, makealerts does not create alerts but instead indicates what it would do in the
        preview field''',
                     require=False,
                     default=False,
                     validate=validators.Boolean())

    alerts = None

    def __init__(self):
        super(MakeAlertsCommand, self).__init__()
        self.insert_stats = InsertStats()
        self.loggerExtra = self.logger

    def is_scheduled(self):
        sid = self._metadata.searchinfo.sid
        return sid.startswith("scheduler_") or sid.startswith("rt_scheduler_")

    def stream(self, records):
        #self.logger.info('MakeAlertsCommand: %s, type of record %s', self, type(records))  # logs command line
        #self.logger.info('SEARCHINFO %s', self._metadata.searchinfo)

        sid = self._metadata.searchinfo.sid
        self.loggerExtra = CustomLogAdapter(self.logger, {
            'sid': sid,
            'type': self.alert_type
        })

        if not self.interactive and not self.is_scheduled():
            raise RuntimeError(
                "When testing makealerts from interactive search, provide the 'interative=t' option."
            )

        if not self.alerts:
            self.alerts = AlertCollection(
                self._metadata.searchinfo.session_key)

        for record in records:
            search_context = SearchContext(self._metadata.searchinfo,
                                           self.loggerExtra)
            self.alerts.insert(record,
                               event_time=self.time,
                               entity=self.entity,
                               alert_type=self.alert_type,
                               severity=self.severity,
                               idfield=self.idfield,
                               combine=self.combine,
                               combine_window=self.combine_window,
                               preview=self.preview,
                               search_context=search_context,
                               insert_stats=self.insert_stats)
            if self.preview:
                record['preview'] = str(search_context.messages)
            yield record

    def finish(self):
        if self.interactive and (
                not self.is_scheduled()) and self.insert_stats.errors > 0:
            self.write_error(
                "There were {0} error(s) when trying to insert data, check logs with this search 'index=_internal MakeAlertsCommand source=*super_simple_siem.log* ERROR'",
                self.insert_stats.errors)

        if not self.preview:
            self.loggerExtra.info('s3tag=stats', str(self.insert_stats))

        try:
            super(MakeAlertsCommand, self).finish()
        except:
            pass
class mispgetevent(ReportingCommand):
    """ get the attributes from a MISP instance.
    ##Syntax
    .. code-block::
        | mispgetevent misp_instance=<input> last=<int>(d|h|m)
        | mispgetevent misp_instance=<input> event=<id1>(,<id2>,...)
        | mispgetevent misp_instance=<input> date=<<YYYY-MM-DD>
                                            (date_to=<YYYY-MM-DD>)
    ##Description
    {
        "returnFormat": "mandatory",
        "page": "optional",
        "limit": "optional",
        "value": "optional",
        "type": "optional",
        "category": "optional",
        "org": "optional",
        "tag": "optional",
        "tags": "optional",
        "searchall": "optional",
        "date": "optional",
        "last": "optional",
        "eventid": "optional",
        "withAttachments": "optional",
        "metadata": "optional",
        "uuid": "optional",
        "published": "optional",
        "publish_timestamp": "optional",
        "timestamp": "optional",
        "enforceWarninglist": "optional",
        "sgReferenceOnly": "optional",
        "eventinfo": "optional",
        "excludeLocalTags": "optional"
    }
    # status
        "tag": "optional",
        "searchall": "optional",
        "metadata": "optional",
        "published": "optional",
        "sgReferenceOnly": "optional",
        "eventinfo": "optional",
        "excludeLocalTags": "optional"

        "returnFormat": forced to json,
        "page": param,
        "limit": param,
        "value": not managed,
        "type": param, CSV string,
        "category": param, CSV string,
        "org": not managed,
        "tags": param, see also not_tags
        "date": param,
        "last": param,
        "eventid": param,
        "withAttachments": forced to false,
        "uuid": not managed,
        "publish_timestamp": managed via param last
        "timestamp": not managed,
        "enforceWarninglist": not managed,
    }
    """
    # MANDATORY MISP instance for this search
    misp_instance = Option(doc='''
        **Syntax:** **misp_instance=instance_name*
        **Description:**MISP instance parameters as described
         in local/inputs.conf.''',
                           require=True)
    # MANDATORY: json_request XOR eventid XOR last XOR date
    json_request = Option(doc='''
        **Syntax:** **json_request=***valid JSON request*
        **Description:**Valid JSON request''',
                          require=False)
    eventid = Option(doc='''
        **Syntax:** **eventid=***id1(,id2,...)*
        **Description:**list of event ID(s) or event UUID(s).''',
                     require=False,
                     validate=validators.Match("eventid", r"^[0-9a-f,\-]+$"))
    last = Option(doc='''
        **Syntax:** **last=***<int>d|h|m*
        **Description:** publication duration in day(s), hour(s) or minute(s).
        **nota bene:** last is an alias of published_timestamp''',
                  require=False,
                  validate=validators.Match("last", r"^[0-9]+[hdm]$"))
    date = Option(doc='''
        **Syntax:** **date=***The user set event date field
         - any of valid time related filters"*
        **Description:**starting date. **eventid**, **last**
         and **date** are mutually exclusive''',
                  require=False)
    # Other params
    page = Option(doc='''
        **Syntax:** **page=***<int>*
        **Description:**define the page for each MISP search; default 1.''',
                  require=False,
                  validate=validators.Match("limit", r"^[0-9]+$"))
    limit = Option(doc='''
        **Syntax:** **limit=***<int>*
        **Description:**define the limit for each MISP search; default 1000.
         0 = no pagination.''',
                   require=False,
                   validate=validators.Match("limit", r"^[0-9]+$"))
    type = Option(doc='''
        **Syntax:** **type=***CSV string*
        **Description:**Comma(,)-separated string of types to search for.
         Wildcard is %.''',
                  require=False)
    category = Option(doc='''
        **Syntax:** **category=***CSV string*
        **Description:**Comma(,)-separated string of categories to search for.
         Wildcard is %.''',
                      require=False)
    tags = Option(doc='''
        **Syntax:** **tags=***CSV string*
        **Description:**Comma(,)-separated string of tags to search for.
         Wildcard is %.''',
                  require=False)
    not_tags = Option(doc='''
        **Syntax:** **not_tags=***CSV string*
        **Description:**Comma(,)-separated string of tags to exclude.
         Wildcard is %.''',
                      require=False)
    published = Option(doc='''
        **Syntax:** **published=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**select only published events (for option from to) .''',
                       require=False,
                       validate=validators.Boolean())
    getioc = Option(doc='''
        **Syntax:** **getioc=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**Boolean to return the list of attributes
         together with the event.''',
                    require=False,
                    validate=validators.Boolean())
    pipesplit = Option(doc='''
        **Syntax:** **pipesplit=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**Boolean to split multivalue attributes.''',
                       require=False,
                       validate=validators.Boolean())

    @Configuration()
    def map(self, records):
        # self.logger.debug('mispevent.map')
        return records

    def reduce(self, records):

        # Phase 1: Preparation
        my_args = prepare_config(self)
        my_args['misp_url'] = my_args['misp_url'] + '/events/restSearch'

        # check that ONE of mandatory fields is present
        mandatory_arg = 0
        if self.json_request is not None:
            mandatory_arg = mandatory_arg + 1
        if self.eventid:
            mandatory_arg = mandatory_arg + 1
        if self.last:
            mandatory_arg = mandatory_arg + 1
        if self.date:
            mandatory_arg = mandatory_arg + 1

        if mandatory_arg == 0:
            logging.error('Missing "json_request", eventid", \
                "last" or "date" argument')
            raise Exception('Missing "json_request", "eventid", \
                "last" or "date" argument')
        elif mandatory_arg > 1:
            logging.error('Options "json_request", eventid", "last" \
                and "date" are mutually exclusive')
            raise Exception('Options "json_request", "eventid", "last" \
                and "date" are mutually exclusive')

        body_dict = dict()
        # Only ONE combination was provided
        if self.json_request is not None:
            body_dict = json.loads(self.json_request)
            logging.info('Option "json_request" set')
        elif self.eventid:
            if "," in self.eventid:
                event_criteria = {}
                event_list = self.eventid.split(",")
                event_criteria['OR'] = event_list
                body_dict['eventid'] = event_criteria
            else:
                body_dict['eventid'] = self.eventid
            logging.info('Option "eventid" set with %s',
                         json.dumps(body_dict['eventid']))
        elif self.last:
            body_dict['last'] = self.last
            logging.info('Option "last" set with %s', str(body_dict['last']))
        else:
            body_dict['date'] = self.date.split()
            logging.info('Option "date" set with %s',
                         json.dumps(body_dict['date']))

        # Force some values on JSON request
        body_dict['returnFormat'] = 'json'
        body_dict['withAttachments'] = False
        # set proper headers
        headers = {'Content-type': 'application/json'}
        headers['Authorization'] = my_args['misp_key']
        headers['Accept'] = 'application/json'

        # Search pagination
        pagination = True
        if self.limit is not None:
            limit = int(self.limit)
        elif 'limit' in body_dict:
            limit = int(body_dict['limit'])
        else:
            limit = 1000
        if limit == 0:
            pagination = False
        if self.page is not None:
            page = int(self.page)
        elif 'page' in body_dict:
            page = body_dict['page']
        else:
            page = 1
        if self.published is True:
            body_dict['published'] = True
        elif self.published is False:
            body_dict['published'] = False
        if self.category is not None:
            if "," in self.category:
                cat_criteria = {}
                cat_list = self.category.split(",")
                cat_criteria['OR'] = cat_list
                body_dict['category'] = cat_criteria
            else:
                body_dict['category'] = self.category
        if self.type is not None:
            if "," in self.type:
                type_criteria = {}
                type_list = self.type.split(",")
                type_criteria['OR'] = type_list
                body_dict['type'] = type_criteria
            else:
                body_dict['type'] = self.type
        if self.tags is not None or self.not_tags is not None:
            tags_criteria = {}
            if self.tags is not None:
                tags_list = self.tags.split(",")
                tags_criteria['OR'] = tags_list
            if self.not_tags is not None:
                tags_list = self.not_tags.split(",")
                tags_criteria['NOT'] = tags_list
            body_dict['tags'] = tags_criteria
        # output filter parameters
        if self.getioc is True:
            my_args['getioc'] = True
        else:
            my_args['getioc'] = False
        if self.pipesplit is True:
            my_args['pipe'] = True
        else:
            my_args['pipe'] = False

        results = []
        # add colums for each type in results
        typelist = []

        if pagination is True:
            body_dict['page'] = page
            body_dict['limit'] = limit

        body = json.dumps(body_dict)
        logging.error('mispgetevent request body: %s', body)
        # search
        r = requests.post(my_args['misp_url'],
                          headers=headers,
                          data=body,
                          verify=my_args['misp_verifycert'],
                          cert=my_args['client_cert_full_path'],
                          proxies=my_args['proxies'])
        # check if status is anything other than 200;
        # throw an exception if it is
        r.raise_for_status()
        # response is 200 by this point or we would have thrown an exception
        response = r.json()
        if 'response' in response:
            for r_item in response['response']:
                if 'Event' in r_item:
                    for a in list(r_item.values()):
                        v = {}
                        v['misp_event_id'] = str(a['id'])
                        v['misp_orgc_id'] = str(a['orgc_id'])
                        v['misp_event_date'] = str(a['date'])
                        v['threat_level_id'] = str(a['threat_level_id'])
                        v['misp_event_info'] = a['info']
                        v['misp_event_published'] = str(a['published'])
                        v['misp_event_uuid'] = str(a['uuid'])
                        v['misp_attribute_count'] = str(a['attribute_count'])
                        v['misp_analysis'] = str(a['analysis'])
                        v['misp_timestamp'] = str(a['timestamp'])
                        v['misp_distribution'] = str(a['distribution'])
                        v['misp_publish_timestamp'] = \
                            str(a['publish_timestamp'])
                        v['misp_sharing_group_id'] = str(a['sharing_group_id'])
                        v['misp_extends_uuid'] = str(a['extends_uuid'])
                        if 'Orgc' in a:
                            v['misp_orgc_name'] = str(a['Orgc']['name'])
                            v['misp_orgc_uuid'] = str(a['Orgc']['uuid'])
                        tag_list = []
                        if 'Tag' in a:
                            for tag in a['Tag']:
                                try:
                                    tag_list.append(str(tag['name']))
                                except Exception:
                                    pass
                        v['misp_tag'] = tag_list
                        if my_args['getioc'] is True:
                            v['Attribute'] = list()
                        v['misp_attribute_count'] = 0
                        if 'Attribute' in a:
                            v['misp_attribute_count'] = \
                                v['misp_attribute_count'] + len(a['Attribute'])
                            if my_args['getioc'] is True:
                                for attribute in a['Attribute']:
                                    # combined: not part of an object AND
                                    # multivalue attribute AND to be split
                                    if int(attribute['object_id']) == 0 \
                                       and '|' in attribute['type'] \
                                       and my_args['pipe'] is True:
                                        mv_type_list = \
                                            attribute['type'].split('|')
                                        mv_value_list = \
                                            str(attribute['value']).split('|')
                                        left_a = attribute.copy()
                                        left_a['type'] = mv_type_list.pop()
                                        left_a['value'] = mv_value_list.pop()
                                        v['Attribute'].append(
                                            getioc(left_a, typelist,
                                                   my_args['pipe'],
                                                   left_a['object_id']))
                                        right_a = attribute.copy()
                                        right_a['type'] = mv_type_list.pop()
                                        right_a['value'] = mv_value_list.pop()
                                        v['Attribute'].append(
                                            getioc(right_a, typelist,
                                                   my_args['pipe'],
                                                   right_a['object_id']))
                                    else:
                                        v['Attribute'].append(
                                            getioc(attribute, typelist,
                                                   my_args['pipe'],
                                                   attribute['object_id']))
                        if 'Object' in a:
                            for misp_o in a['Object']:
                                if 'Attribute' in misp_o:
                                    v['misp_attribute_count'] = \
                                        v['misp_attribute_count'] \
                                        + len(misp_o['Attribute'])
                                    if my_args['getioc'] is True:
                                        object_id = misp_o['id']
                                        object_name = misp_o['name']
                                        object_comment = misp_o['comment']
                                        for attribute in misp_o['Attribute']:
                                            v['Attribute'].append(
                                                getioc(attribute, typelist,
                                                       my_args['pipe'],
                                                       object_id, object_name,
                                                       object_comment))
                        logging.debug('event is %s', json.dumps(v))
                        results.append(v)
        logging.info('typelist is %s', json.dumps(typelist))
        # relevant_cat = ['Artifacts dropped', 'Financial fraud',
        # 'Network activity','Payload delivery','Payload installation']
        logging.debug('results is %s', json.dumps(results))
        if my_args['getioc'] is False:
            for e in results:
                yield e
        else:
            output_dict = {}
            for e in results:
                if 'Attribute' in e:
                    for r in e['Attribute']:
                        if int(r['misp_object_id']) == 0:  # not an object
                            key = str(e['misp_event_id']) + '_' \
                                + r['misp_attribute_id']
                            is_object_member = False
                        else:  # this is a  MISP object
                            key = str(e['misp_event_id']) + \
                                '_object_' + str(r['misp_object_id'])
                            is_object_member = True
                        if key not in output_dict:
                            v = init_misp_output(e, r)
                            for t in typelist:
                                misp_t = 'misp_' \
                                    + t.replace('-', '_').replace('|', '_p_')
                                if t == r['misp_type']:
                                    v[misp_t] = r['misp_value']
                                else:
                                    v[misp_t] = ''
                            to_ids = []
                            to_ids.append(r['misp_to_ids'])
                            v['misp_to_ids'] = to_ids
                            category = []
                            category.append(r['misp_category'])
                            v['misp_category'] = category
                            attribute_uuid = []
                            attribute_uuid.append(r['misp_attribute_uuid'])
                            v['misp_attribute_uuid'] = attribute_uuid
                            if is_object_member is True:
                                v['misp_type'] = v['misp_object_name']
                                v['misp_value'] = v['misp_object_id']
                            output_dict[key] = dict(v)
                        else:
                            v = dict(output_dict[key])
                            misp_t = 'misp_' + r['misp_type'].replace('-', '_')
                            v[misp_t] = r['misp_value']  # set value for type
                            to_ids = v['misp_to_ids']
                            if r['misp_to_ids'] not in to_ids:
                                to_ids.append(r['misp_to_ids'])
                            v['misp_to_ids'] = to_ids
                            category = v['misp_category']
                            # append
                            if r['misp_category'] not in category:
                                category.append(r['misp_category'])
                            v['misp_category'] = category
                            attribute_uuid = v['misp_attribute_uuid']
                            if r['misp_attribute_uuid'] not in attribute_uuid:
                                attribute_uuid.append(r['misp_attribute_uuid'])
                            v['misp_attribute_uuid'] = attribute_uuid
                            if is_object_member is False:
                                misp_type = r['misp_type'] \
                                    + '|' + v['misp_type']
                                v['misp_type'] = misp_type
                                misp_value = r['misp_value'] + \
                                    '|' + v['misp_value']
                                v['misp_value'] = misp_value
                            output_dict[key] = dict(v)
            for k, v in list(output_dict.items()):
                yield v
Beispiel #16
0
class CleanText(StreamingCommand):
    """ Counts the number of non-overlapping matches to a regular expression in a set of fields.

    ##Syntax

    .. code-block::
        cleantext textfield=<field> [default_clean=<bool>] [remove_urls=<bool>] [remove_stopwords=<bool>] 
            [base_word=<bool>] [base_type=<string>] [mv=<bool>] [force_nltk_tokenize=<bool>] 
            [pos_tagset=<string>] [custom_stopwords=<comma_separated_string_list>] [term_min_len=<int>] 
            [ngram_range=<int>-<int>] [ngram_mix=<bool>]

    ##Description

    Tokenize and normalize text (remove punctuation, digits, change to base_word)
    Different options result in better and slower cleaning. base_type="lemma_pos" being the
    slowest option, base_type="lemma" assumes every word is a noun, which is faster but still
    results in decent lemmatization. Many fields have a default already set, textfield is only
    required field. By default results in a multi-valued field which is ready for used with
    stats count by.

    ##Example

    .. code-block::
        * | cleantext textfield=sentence
    """

    textfield = Option(require=True,
                       doc='''
        **Syntax:** **textfield=***<fieldname>*
        **Description:** Name of the field that will contain the text to search against''',
                       validate=validators.Fieldname())
    keep_orig = Option(default=False,
                       doc='''**Syntax:** **keep_orig=***<boolean>*
        **Description:** Maintain a copy of the original text for comparison or searching into field called
        orig_text''',
                       validate=validators.Boolean())
    default_clean = Option(default=True,
                           doc='''**Syntax:** **default_clean=***<boolean>*
        **Description:** Change text to lowercase, remove punctuation, and removed numbers, defaults to true''',
                           validate=validators.Boolean())
    remove_urls = Option(default=True,
                         doc='''**Syntax:** **remove_urls=***<boolean>*
        **Description:** Remove html links as part of text cleaning, defaults to true''',
                         validate=validators.Boolean())
    remove_stopwords = Option(
        default=True,
        doc='''**Syntax:** **remove_stopwords=***<boolean>*
        **Description:** Remove stopwords as part of text cleaning, defaults to true''',
        validate=validators.Boolean())
    base_word = Option(default=True,
                       doc='''**Syntax:** **base_word=***<boolean>*
        **Description:** Convert words to a base form as part of text cleaning, defaults to true and subject to value of base_type setting''',
                       validate=validators.Boolean())
    base_type = Option(
        default='lemma',
        doc='''**Syntax:** **base_type=***<string>*
        **Description:** Options are lemma, lemma_pos, or stem, defaults to lemma and subject to value of base_word setting being true''',
    )
    mv = Option(default=True,
                doc='''**Syntax:** **mv=***<boolean>*
        **Description:** Returns words as multivalue otherwise words are space separated, defaults to true''',
                validate=validators.Boolean())
    force_nltk_tokenize = Option(
        default=False,
        doc='''**Syntax:** **force_nltk_tokenize=***<boolean>*
        **Description:** Forces use of better NLTK word tokenizer but is slower, defaults to false''',
        validate=validators.Boolean())
    pos_tagset = Option(
        default=None,
        doc='''**Syntax:** **pos_tagset=***<string>*
        **Description:** Options are universal, wsj, or brown; defaults to universal and subject to base_type set to "lemma_pos"''',
    )
    custom_stopwords = Option(
        doc='''**Syntax:** **custom_stopwords=***<string>*
        **Description:** comma-separated list of custom stopwords, enclose in quotes''',
    )
    term_min_len = Option(default=0,
                          doc='''**Syntax:** **term_min_len=***<int>*
        **Description:** Only terms greater than or equal to this number will be returned. Useful if data has a lot of HTML markup.''',
                          validate=validators.Integer())
    ngram_range = Option(
        default='1-1',
        doc='''**Syntax:** **ngram_rane=***<int>-<int>*
        **Description:** Returns new ngram column with range of ngrams specified if max is greater than 1"''',
    )
    ngram_mix = Option(default=False,
                       doc='''**Syntax:** **ngram_mix=***<boolean>*
        **Description:** Determines if ngram output is combined or separate columns. Defaults to false which results in separate columns''',
                       validate=validators.Boolean())

    #http://dev.splunk.com/view/logging/SP-CAAAFCN
    def setup_logging(self):
        logger = logging.getLogger('splunk.foo')
        SPLUNK_HOME = os.environ['SPLUNK_HOME']

        LOGGING_DEFAULT_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc',
                                                   'log.cfg')
        LOGGING_LOCAL_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc',
                                                 'log-local.cfg')
        LOGGING_STANZA_NAME = 'python'
        LOGGING_FILE_NAME = "nlp-text-analytics.log"
        BASE_LOG_PATH = os.path.join('var', 'log', 'splunk')
        LOGGING_FORMAT = "%(asctime)s %(levelname)-s\t%(module)s:%(lineno)d - %(message)s"
        splunk_log_handler = logging.handlers.RotatingFileHandler(os.path.join(
            SPLUNK_HOME, BASE_LOG_PATH, LOGGING_FILE_NAME),
                                                                  mode='a')
        splunk_log_handler.setFormatter(logging.Formatter(LOGGING_FORMAT))
        logger.addHandler(splunk_log_handler)
        setupSplunkLogger(logger, LOGGING_DEFAULT_CONFIG_FILE,
                          LOGGING_LOCAL_CONFIG_FILE, LOGGING_STANZA_NAME)
        return logger

    #https://stackoverflow.com/a/15590384
    def get_wordnet_pos(self, treebank_tag):
        if treebank_tag.startswith('J'):
            return wordnet.ADJ
        elif treebank_tag.startswith('V'):
            return wordnet.VERB
        elif treebank_tag.startswith('N'):
            return wordnet.NOUN
        elif treebank_tag.startswith('R'):
            return wordnet.ADV
        else:
            return 'n'

    def f_remove_urls(self, text):
        return re.sub('https?://[^\b\s<]+', '', text)

    def ngram(self, text, min_n, max_n):
        ngram_list = []
        for n in range(min_n, max_n):
            for ngram in ngrams(text, n):
                if len(ngram) > 1:
                    ngram_list.append((len(ngram), ' '.join(ngram)))
        return ngram_list

    def stream(self, records):
        logger = self.setup_logging()
        logger.info('textfield set to: ' + self.textfield)
        if self.custom_stopwords:
            custom_stopwords = self.custom_stopwords.replace(' ',
                                                             '').split(',')
        for record in records:
            if self.keep_orig:
                record['orig_text'] = record[self.textfield]
            #URL removal
            if self.remove_urls:
                record[self.textfield] = self.f_remove_urls(
                    record[self.textfield])
            #Tokenization
            if (self.base_word and self.base_type
                    == 'lemma_pos') or self.force_nltk_tokenize:
                #lemma_pos - if option is lemmatization with POS tagging do cleaning and stopword options now
                if (self.base_word and self.base_type == 'lemma_pos'):
                    record['pos_tuple'] = pos_tag(word_tokenize(
                        record[self.textfield].decode('utf-8').encode(
                            'ascii', 'ignore')),
                                                  tagset=self.pos_tagset)
                    if self.default_clean and self.remove_stopwords:
                        if self.custom_stopwords:
                            stopwords = set(
                                stop_words.words('english') + custom_stopwords)
                        else:
                            stopwords = set(stop_words.words('english'))
                        record['pos_tuple'] = [
                            [re.sub(r'[\W\d]', '', text[0]).lower(), text[1]]
                            for text in record['pos_tuple']
                            if re.sub(r'[\W\d]', '', text[0]).lower() not in
                            stopwords and not re.search(r'[\W]', text[0])
                        ]
                    elif self.default_clean and not self.remove_stopwords:
                        record['pos_tuple'] = [
                            [re.sub(r'[\W\d]', '', text[0]).lower(), text[1]]
                            for text in record['pos_tuple']
                            if not re.search(r'[\W]', text[0])
                        ]
                elif self.force_nltk_tokenize:
                    record[self.textfield] = word_tokenize(
                        record[self.textfield])
            elif self.default_clean or (self.base_word
                                        and self.base_type == 'lemma'):
                #https://stackoverflow.com/a/1059601
                record[self.textfield] = re.split('\W+',
                                                  record[self.textfield])
            else:
                record[self.textfield] = record[self.textfield].split()
            #Default Clean
            if self.default_clean and not self.base_type == 'lemma_pos':
                record[self.textfield] = [
                    re.sub(r'[\W\d]', '', text).lower()
                    for text in record[self.textfield]
                ]
            #Lemmatization with POS tagging
            if self.base_word and self.base_type == 'lemma_pos':
                lm = WordNetLemmatizer()
                tuple_list = []
                tag_list = []
                record[self.textfield] = []
                record['pos_tag'] = []
                for text in record['pos_tuple']:
                    keep_text = lm.lemmatize(text[0],
                                             self.get_wordnet_pos(
                                                 text[1])).encode(
                                                     'ascii', 'ignore')
                    if keep_text:
                        record[self.textfield].append(keep_text)
                        tuple_list.append([keep_text, text[1]])
                        tag_list.append(text[1])
                        record['pos_tag'] = tag_list
                        record['pos_tuple'] = tuple_list
            #Lemmatization or Stemming with stopword removal
            if self.remove_stopwords and self.base_word and self.base_type != 'lemma_pos':
                if self.custom_stopwords:
                    stopwords = set(
                        stop_words.words('english') + custom_stopwords)
                else:
                    stopwords = set(stop_words.words('english'))
                if self.base_type == 'lemma':
                    lm = WordNetLemmatizer()
                    record[self.textfield] = [
                        lm.lemmatize(text) for text in record[self.textfield]
                        if text not in stopwords
                    ]
                if self.base_type == 'stem':
                    ps = PorterStemmer()
                    record[self.textfield] = [
                        ps.stem(text) for text in record[self.textfield]
                        if text not in stopwords
                    ]
            #Lemmatization or Stemming without stopword removal
            if not self.remove_stopwords and self.base_word:
                if self.base_type == 'lemma':
                    lm = WordNetLemmatizer()
                    record[self.textfield] = [
                        lm.lemmatize(text) for text in record[self.textfield]
                    ]
                if self.base_type == 'stem':
                    ps = PorterStemmer()
                    record[self.textfield] = [
                        ps.stem(text) for text in record[self.textfield]
                    ]
            #Stopword Removal
            if self.remove_stopwords and not self.base_word:
                if self.custom_stopwords:
                    stopwords = set(
                        stop_words.words('english') + custom_stopwords)
                else:
                    stopwords = set(stop_words.words('english'))
                record[self.textfield] = [
                    text for text in record[self.textfield]
                    if text not in stopwords
                ]
            #Minimum term length
            if self.term_min_len > 0:
                record[self.textfield] = [
                    i for i in record[self.textfield]
                    if len(i) >= self.term_min_len
                ]
            #ngram column creation
            (min_n, max_n) = self.ngram_range.split('-')
            if max_n > 1 and max_n >= min_n:
                max_n = int(max_n) + 1
                ngram_extract = self.ngram(
                    filter(None, record[self.textfield]), int(min_n), max_n)
                if ngram_extract:
                    for i in ngram_extract:
                        if not self.ngram_mix:
                            if 'ngrams_' + str(i[0]) not in record:
                                record['ngrams_' + str(i[0])] = []
                            record['ngrams_' + str(i[0])].append(i[1])
                        else:
                            if 'ngrams' not in record:
                                record['ngrams'] = []
                            record['ngrams'].append(i[1])
                else:
                    if not self.ngram_mix:
                        for n in range(int(min_n), int(max_n)):
                            if n != 1:
                                record['ngrams_' + str(n)] = []
                    else:
                        if 'ngrams' not in record:
                            record['ngrams'] = []
            #Final Multi-Value Output
            if not self.mv:
                record[self.textfield] = ' '.join(record[self.textfield])
                try:
                    record['pos_tag'] = ' '.join(record['pos_tag'])
                except:
                    pass

            yield record
Beispiel #17
0
class JsonToFieldsCommand(StreamingCommand):
    json = Option(
        doc='''
        **Syntax:** **json=***<field>*
        **Description:** Field name that contains the json string''',
        require=True, validate=validators.Fieldname())
    prefix = Option(
        doc='''
        **Syntax:** **prefix=***<string>*
        **Description:** Prefix to use to expand fields''',
        require=False)
    typeprefix = Option(
        doc='''
        **Syntax:** **typeprefix=***<bool>*
        **Description:** If true, prefix fields with a letter indicating the type (long, int, float, string, json, array)''',
        require=False, default=False, validate=validators.Boolean())

    def stream(self, records):
        self.logger.info('JsonToFieldsCommand: %s', self)  # logs command line
        for record in records:
            json_str = record.get(self.json)
            if json_str:
                json_obj = json.loads(json_str)
                if self.prefix:
                    prefix = self.prefix 
                else:
                    prefix = ""
                for key, value in json_obj.iteritems():
                    if (not self.fieldnames) or (key in self.fieldnames):
                        if isinstance(value, basestring):
                            if self.typeprefix:
                                tp = "s_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = value
                        elif isinstance(value, collections.Mapping):
                            if self.typeprefix:
                                tp = "j_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = json.dumps(value)
                        elif isinstance(value, collections.Sequence):
                            if self.typeprefix:
                                tp = "a_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = [ json.dumps(s) for s in value ]
                        else:
                            if self.typeprefix:
                                if isinstance(value, int):
                                    tp = "i_"
                                elif isinstance(value, float):
                                    tp = "f_"
                                elif isinstance(value, long):
                                    tp = "l_"
                                else:
                                    tp = "x_"
                            else:
                                tp = ""
                            record[tp + prefix + key] = value
            else:
                self.logger.warn('JsonToFieldsCommand: no field named %s', self.json)
            yield record
Beispiel #18
0
class EMGroupEntityMatchCommand(StreamingCommand):
    """ Match groups and entities based on group filter and entity dimensions

    ##Syntax

    .. code-block::
        emgroupentitymatch selectedGroupIds="states,aws_instances" retainInput=false

    ##Description
        This custom search command will add 'group_id' and 'group_title' to all input
        entity records if they are members of a group - otherwise it will be omitted from the results
        unless retainInput is 'true'.

        Options:
        1. selectedGroupIds -- indicates the selected groups that you want to match against the entities
        2. retainInput -- indicates if the original input records should be attached to the output records
                          if true, those records will have 'group_id' and 'group_title' set to 'N/A' for you
                          to distinguish them.

    ##Example

    .. code-block::
        | inputlookup em_entities
        | emgroupentitymatch selectedGroupIds="states,aws_instances" retainInput=false
        | stats count by group_title

    """

    _group_records = None

    selected_group_ids = Option(doc='List of selected group ids, separated by comma.',
                                name='selectedGroupIds',
                                default=None,
                                require=False,
                                validate=validators.List())
    retain_input_record = Option(doc='Boolean to indicate if user wants the input '
                                     'record to be added to the output without modification.',
                                 name='retainInput',
                                 default=False,
                                 require=False,
                                 validate=validators.Boolean())

    def stream(self, records):
        """
        Generator function that processes and yields event records to the Splunk stream pipeline.
        :param records: splunk event records
        :return:
        """
        self._setup_group_records()
        self.logger.debug('EMGroupEntityMatchCommand: %s', self)  # logs command line
        for record in records:
            if self.retain_input_record:
                record['group_id'] = 'N/A'
                record['group_title'] = 'N/A'
                yield record
            if len(self._group_records) > 0:
                for group_record in self._group_records:
                    if self._match_group_entity(record, group_record.group_filter):
                        record['group_id'] = group_record.group_id
                        record['group_title'] = group_record.group_content.get('title')
                        yield record
            else:
                yield record

    def _setup_group_records(self):
        """
        Grabs the groups from KV Store and builds out the filter objects if they have yet to be built
        :return: None
        """
        if self._group_records is None:
            collection = self.service.kvstore[STORE_GROUPS]
            group_data = collection.data.query()
            if self.selected_group_ids:
                selected_group_set = set(self.selected_group_ids)
                group_data = filter(lambda g: g['_key'] in selected_group_set, group_data)
            group_records = []
            for group in group_data:
                filter_val = group.get('filter')
                d = {}
                if filter_val:
                    for v in filter_val.split(','):
                        dim_name, dim_val = v.strip().split('=')
                        d.setdefault(dim_name, set()).add(dim_val)
                group_records.append(GroupRecord(group_id=group['_key'],
                                                 group_filter=d,
                                                 group_content=group))
            self._group_records = group_records

    def _match_group_entity(self, record, filter_dict):
        """
        Verify whether this record matches to group filters.
        Support wildcard in end of string
        :param record:
        :param filter_dict:
        :return:
        """
        for dim_name, dim_val_set in filter_dict.iteritems():
            record_vals = record.get('dimensions.%s' % dim_name)
            if not record_vals:
                return False
            if not isinstance(record_vals, list):
                record_vals = [record_vals]
            matched = False
            for rval in record_vals:
                # check if record value is one of the filter values
                if rval in dim_val_set:
                    matched = True
                    break
                # otherwise check if record value matches any of the fuzzy match values
                fuzzy_matches = filter(lambda v: v.endswith('*'), dim_val_set)
                if len(fuzzy_matches):
                    matched = any(rval.startswith(v[:-1]) for v in fuzzy_matches)
            if not matched:
                return False
        return True
Beispiel #19
0
class MispSearchCommand(StreamingCommand):
    """ search in MISP for attributes matching the value of field.

    ##Syntax

        code-block::
        mispsearch field=<field> onlyids=y|n

    ##Description

        body =  {
                    "returnFormat": "mandatory",
                    "page": "optional",
                    "limit": "optional",
                    "value": "optional",
                    "type": "optional",
                    "category": "optional",
                    "org": "optional",
                    "tags": "optional",
                    "from": "optional",
                    "to": "optional",
                    "last": "optional",
                    "eventid": "optional",
                    "withAttachments": "optional",
                    "uuid": "optional",
                    "publish_timestamp": "optional",
                    "timestamp": "optional",
                    "enforceWarninglist": "optional",
                    "to_ids": "optional",
                    "deleted": "optional",
                    "includeEventUuid": "optional",
                    "includeEventTags": "optional",
                    "event_timestamp": "optional",
                    "threat_level_id": "optional",
                    "eventinfo": "optional"
                }

    ##Example

    Search in MISP for value of fieldname r_ip (remote IP in proxy logs).

        code-block::
         * | mispsearch field=r_ip

    """

    misp_instance = Option(doc='''
        **Syntax:** **misp_instance=instance_name*
        **Description:**MISP instance parameters as described in local/inputs.conf''',
                           require=True)
    field = Option(doc='''
        **Syntax:** **field=***<fieldname>*
        **Description:**Name of the field containing the value to search for.''',
                   require=True,
                   validate=validators.Fieldname())
    onlyids = Option(doc='''
        **Syntax:** **onlyids=***<y|n>*
        **Description:** Boolean to search only attributes with to_ids set''',
                     require=False,
                     validate=validators.Boolean())
    gettag = Option(doc='''
        **Syntax:** **gettag=***<y|n>*
        **Description:** Boolean to return attribute tags''',
                    require=False,
                    validate=validators.Boolean())
    includeEventUuid = Option(doc='''
        **Syntax:** **includeEventUuid=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to include event UUID(s) to results.''',
                              require=False,
                              validate=validators.Boolean())
    includeEventTags = Option(doc='''
        **Syntax:** **includeEventTags=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to include event UUID(s) to results.''',
                              require=False,
                              validate=validators.Boolean())
    last = Option(doc='''
        **Syntax:** **last=***<int>d|h|m*
        **Description:**publication duration in day(s), hour(s) or minute(s). **eventid**, **last** and **date_from** are mutually exclusive''',
                  require=False,
                  validate=validators.Match("last", r"^[0-9]+[hdm]$"))
    limit = Option(doc='''
        **Syntax:** **limit=***<int>*
        **Description:**define the limit for each MISP search; default 1000. 0 = no pagination.''',
                   require=False,
                   validate=validators.Match("limit", r"^[0-9]+$"))
    page = Option(doc='''
        **Syntax:** **page=***<int>*
        **Description:**define the page for each MISP search; default 1.''',
                  require=False,
                  validate=validators.Match("limit", r"^[0-9]+$"))
    json_request = Option(doc='''
        **Syntax:** **json_request=***valid JSON request*
        **Description:**Valid JSON request''',
                          require=False)

    def stream(self, records):
        # Generate args
        my_args = prepare_config(self)
        my_args['misp_url'] = my_args['misp_url'] + '/attributes/restSearch'
        # set proper headers
        headers = {'Content-type': 'application/json'}
        headers['Authorization'] = my_args['misp_key']
        headers['Accept'] = 'application/json'

        fieldname = str(self.field)
        if self.gettag is True:
            get_tag = True
        else:
            get_tag = False

        pagination = True
        if self.limit is not None:
            if int(self.limit) == 0:
                pagination = False
            else:
                limit = int(self.limit)
        else:
            limit = 1000
        if self.page is not None:
            page = int(self.page)
        else:
            page = 1

        if self.json_request is not None:
            body_dict = json.loads(self.json_request)
            logging.info('Option "json_request" set')
            body_dict['returnFormat'] = 'json'
            body_dict['withAttachments'] = False
            if 'limit' in body_dict:
                limit = int(body_dict['limit'])
                if limit == 0:
                    pagination = False
            if 'page' in body_dict:
                page = body_dict['page']
                pagination = False
        else:
            # build search JSON object
            body_dict = {"returnFormat": "json", "withAttachments": False}
            if self.onlyids is True:
                body_dict['to_ids'] = "True"
            if self.includeEventUuid is not None:
                body_dict['includeEventUuid'] = self.includeEventUuid
            if self.includeEventTags is not None:
                body_dict['includeEventTags'] = self.includeEventTags
            if self.last is not None:
                body_dict['last'] = self.last
        for record in records:
            if fieldname in record:
                value = record.get(fieldname, None)
                if value is not None:
                    body_dict['value'] = str(value)
                    misp_category = []
                    misp_event_id = []
                    misp_event_uuid = []
                    misp_orgc_id = []
                    misp_to_ids = []
                    misp_tag = []
                    misp_type = []
                    misp_value = []
                    misp_uuid = []
                    # search
                    if pagination is True:
                        body_dict['page'] = page
                        body_dict['limit'] = limit
                    body = json.dumps(body_dict)
                    logging.debug('mispsearch request body: %s', body)
                    r = requests.post(my_args['misp_url'],
                                      headers=headers,
                                      data=body,
                                      verify=my_args['misp_verifycert'],
                                      cert=my_args['client_cert_full_path'],
                                      proxies=my_args['proxies'])
                    # check if status is anything other than 200; throw an exception if it is
                    r.raise_for_status()
                    # response is 200 by this point or we would have thrown an exception
                    # print >> sys.stderr, "DEBUG MISP REST API response: %s" % response.json()
                    response = r.json()
                    if 'response' in response:
                        if 'Attribute' in response['response']:
                            for a in response['response']['Attribute']:
                                if str(a['type']) not in misp_type:
                                    misp_type.append(str(a['type']))
                                if str(a['value']) not in misp_value:
                                    misp_value.append(str(a['value']))
                                if str(a['to_ids']) not in misp_to_ids:
                                    misp_to_ids.append(str(a['to_ids']))
                                if str(a['category']) not in misp_category:
                                    misp_category.append(str(a['category']))
                                if str(a['uuid']) not in misp_uuid:
                                    misp_uuid.append(str(a['uuid']))
                                if str(a['event_id']) not in misp_event_id:
                                    misp_event_id.append(str(a['event_id']))
                                if 'Tag' in a:
                                    for tag in a['Tag']:
                                        if str(tag['name']) not in misp_tag:
                                            misp_tag.append(str(tag['name']))
                                if 'Event' in a:
                                    if a['Event'][
                                            'uuid'] not in misp_event_uuid:
                                        misp_event_uuid.append(
                                            str(a['Event']['uuid']))
                                    if a['Event'][
                                            'orgc_id'] not in misp_orgc_id:
                                        misp_orgc_id.append(
                                            str(a['Event']['orgc_id']))
                            record['misp_type'] = misp_type
                            record['misp_value'] = misp_value
                            record['misp_to_ids'] = misp_to_ids
                            record['misp_category'] = misp_category
                            record['misp_attribute_uuid'] = misp_uuid
                            record['misp_event_id'] = misp_event_id
                            record['misp_event_uuid'] = misp_event_uuid
                            record['misp_orgc_id'] = misp_orgc_id
                            record['misp_tag'] = misp_tag

            yield record
Beispiel #20
0
class B64Command(StreamingCommand):
    """
    Encode a string to Base64
    Decode Base64 content

     | base64 [action=(encode|decode)] field=<field> [mode=(replace|append)]
     """

    field = Option(name='field', require=True, default=None)
    action = Option(name='action', require=False, default='decode', validate=Base64Actions())
    mode = Option(name='mode', require=False, default='replace', validate=OutputModes())
    alphabet = Option(name='alphabet', require=False, default=BASE64_CHARS, validate=Base64Alphabet())
    backslash_escape = Option(name='backslash_escape', require=False, default=True, validate=validators.Boolean())
    encoding = Option(name='encoding', require=False, default=None, validate=OutputEncoding())
    recurse = Option(name='recurse', require=False, default=False, validate=validators.Boolean())
    suppress_error = Option(name='suppress_error', require=False, default=False, validate=validators.Boolean())

    def stream(self, records):

        # Set the output field
        if self.mode == 'append':
            dest_field = 'base64'
        else:
            dest_field = self.field

        for record in records:
            # Return unchanged record if the field is not present
            if self.field not in record:
                yield record
                continue

            # Process field
            field_data_list = record[self.field]
            output_data_list = []

            # Ensure all values are in a list
            if not isinstance(field_data_list, list):
                field_data_list = [field_data_list]

            for field_data in field_data_list:
                try:
                    # Base64 Encoding
                    if self.action == 'encode':
                        # Expected input is UTF-8 read as Unicode.
                        # To pass other formats, it must be unescaped from backslash_escape
                        if self.backslash_escape:
                            field_data = field_data.encode('utf-8', errors='ignore').decode('unicode_escape')
                        field_data = field_data.encode(self.encoding, errors='ignore')
                        # Add encoded ASCII data to output
                        output_data_list.append(ensure_str(
                            to_b64(field_data, custom_alphabet=self.alphabet)
                        ))

                    # Base64 Decoding
                    else:
                        output_data = from_b64(field_data, custom_alphabet=self.alphabet, recurse=self.recurse)
                        # Try specified encoding
                        if self.encoding:
                            try:
                                decode_attempt = output_data.decode(self.encoding, errors='strict')
                                if '\x00' not in decode_attempt:
                                    output_data_list.append(decode_attempt)
                                    continue
                            except UnicodeDecodeError:
                                pass
                        # Backlash escape output
                        # Null values will break the data passed back through stdout
                        if self.backslash_escape or b'\x00' in output_data:
                            output_data_list.append(
                                backslash_escape(output_data)
                            )
                        # If encoding was not set, backslash_escape was not set, and no null found
                        else:
                            output_data_list.append(
                                output_data.decode('utf8', errors='replace')
                            )

                except Exception as e:
                    if not self.suppress_error:
                        raise e

                record[dest_field] = output_data_list

            yield record
class mispapireport(ReportingCommand):
    """ MISP API wrapper for endpoint /attributes/restSearch.
    return format is JSON for the momemnt
    ##Syntax
    use paramater names to set values in the POST request body below.
    .. code-block::
        | mispapireport misp_instance=<input> page=<int> limit=<int> value=string type=CSVstring category=CSVstring org=string 
                        tags=CSVstring not_tags=CSVstrings date_from=date_string date_to=date_string last=<int>(d|h|m)
                        eventid=CSVint uuid=CSVuuid_string enforceWarninglist=True|False 
                        to_ids=True|False deleted=True|False includeEventUuid=True|False includeEventTags==True|False
                        threat_level_id=<int> eventinfo=string

    forced parameters:
        "returnFormat": "json"
        withAttachments: False
    not handled parameters:
        "publish_timestamp": "optional",
        "timestamp": "optional",
        "event_timestamp": "optional",


    ##Description
    {
        "returnFormat": "mandatory",
        "page": "optional",
        "limit": "optional",
        "value": "optional",
        "type": "optional",
        "category": "optional",
        "org": "optional",
        "tags": "optional",
        "from": "optional",
        "to": "optional",
        "last": "optional",
        "eventid": "optional",
        "withAttachments": "optional",
        "uuid": "optional",
        "publish_timestamp": "optional",
        "timestamp": "optional",
        "enforceWarninglist": "optional",
        "to_ids": "optional",
        "deleted": "optional",
        "includeEventUuid": "optional",
        "includeEventTags": "optional",
        "event_timestamp": "optional",
        "threat_level_id": "optional",
        "eventinfo": "optional",
        "includeProposals": "optional"
    }
    # status for mode=p
        "returnFormat": forced to json,
        "page": param,
        "limit": param,
        "value": param,
        "type": param, CSV string,
        "category": param, CSV string,
        "org": param, CSV string,
        "tags": param with not_tags,
        "from": param,
        "to": param,
        "last": param,
        "eventid": param,
        "withAttachments": forced to false,
        "uuid": param,
        "publish_timestamp": not managed,
        "timestamp": not managed,
        "enforceWarninglist": param,
        "to_ids": param,
        "deleted": param,
        "includeEventUuid": param,
        "includeEventTags": param,
        "event_timestamp":  not managed,
        "threat_level_id":  param,
        "eventinfo": param,
        "includeProposals": not managed
    }

    """
    # Superseede MISP instance for this search
    misp_instance = Option(doc='''
        **Syntax:** **misp_instance=instance_name*
        **Description:**MISP instance parameters as described in local/inputs.conf.''',
                           require=True)
    # mode: p - give parameters one by one / j provide a complete JSON string
    # default is mode=p
    mode = Option(doc='''
        **Syntax:** **mode=***p|j<AUTH_KEY>*
        **Description:**mode to build the JSON request.''',
                  require=False,
                  validate=validators.Match("mode", r"^(p|j)$"))
    # if mode=j a complete JSON request has to be provided
    json_request = Option(doc='''
        **Syntax:** **json_request=***valid JSON request*
        **Description:**Valid JSON request''',
                          require=False)
    # specific formats
    last = Option(doc='''
        **Syntax:** **last=***<int>d|h|m*
        **Description:**publication duration in day(s), hour(s) or minute(s).''',
                  require=False,
                  validate=validators.Match("last", r"^[0-9]+[hdm]$"))
    date_from = Option(doc='''
        **Syntax:** **date_from=***date_string"*
        **Description:**starting date.''',
                       require=False)
    date_to = Option(doc='''
        **Syntax:** **date_to=***date_string"*
        **Description:**(optional)ending date in searches with date_from. if not set default is now''',
                     require=False)
    threat_level_id = Option(doc='''
        **Syntax:** **threat_level_id=***1-4*
        **Description:**Threat level.''',
                             require=False,
                             validate=validators.Match("threat_level_id",
                                                       r"^[1-4]$"))
    org = Option(doc='''
        **Syntax:** **org=***CSV string*
        **Description:**Comma(,)-separated string of org name(s), id(s), uuid(s).''',
                 require=False)
    # CSV numeric list
    eventid = Option(doc='''
        **Syntax:** **eventid=***id1(,id2,...)*
        **Description:**list of event ID(s).''',
                     require=False,
                     validate=validators.Match("eventid", r"^[0-9,]+$"))
    # strings
    value = Option(doc='''
        **Syntax:** **value=***string*
        **Description:**value.''',
                   require=False)
    eventinfo = Option(doc='''
        **Syntax:** **eventinfo=***string*
        **Description:**eventinfo string''',
                       require=False)
    # numeric values
    limit = Option(doc='''
        **Syntax:** **limit=***<int>*
        **Description:**define the limit for each MISP search; default 10000. 0 = no pagination.''',
                   require=False,
                   validate=validators.Match("limit", r"^[0-9]+$"))
    page = Option(doc='''
        **Syntax:** **page=***<int>*
        **Description:**define the page of result to get.''',
                  require=False,
                  validate=validators.Match("limit", r"^[0-9]+$"))
    # CSV strings
    uuid = Option(doc='''
        **Syntax:** **uuid=***id1(,id2,...)*
        **Description:**list of event UUID(s).''',
                  require=False)
    type = Option(doc='''
        **Syntax:** **type=***CSV string*
        **Description:**Comma(,)-separated string of categories to search for. Wildcard is %.''',
                  require=False)
    category = Option(doc='''
        **Syntax:** **category=***CSV string*
        **Description:**Comma(,)-separated string of categories to search for. Wildcard is %.''',
                      require=False)
    tags = Option(doc='''
        **Syntax:** **tags=***CSV string*
        **Description:**Comma(,)-separated string of tags to search for. Wildcard is %.''',
                  require=False)
    not_tags = Option(doc='''
        **Syntax:** **not_tags=***CSV string*
        **Description:**Comma(,)-separated string of tags to exclude from results. Wildcard is %.''',
                      require=False)
    # Booleans
    to_ids = Option(doc='''
        **Syntax:** **to_ids=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to search only attributes with the flag "to_ids" set to true.''',
                    require=False,
                    validate=validators.Boolean())
    enforceWarninglist = Option(doc='''
        **Syntax:** **enforceWarninglist=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to apply warning lists to results.''',
                                require=False,
                                validate=validators.Boolean())
    deleted = Option(doc='''
        **Syntax:** **deleted=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to include deleted attributes to results.''',
                     require=False,
                     validate=validators.Boolean())
    includeEventUuid = Option(doc='''
        **Syntax:** **includeEventUuid=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to include event UUID(s) to results.''',
                              require=False,
                              validate=validators.Boolean())
    includeEventTags = Option(doc='''
        **Syntax:** **includeEventTags=***y|Y|1|true|True|n|N|0|false|False*
        **Description:**Boolean to include event UUID(s) to results.''',
                              require=False,
                              validate=validators.Boolean())
    pipesplit = Option(doc='''
        **Syntax:** **pipesplit=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**Boolean to split multivalue attributes into 2 attributes.''',
                       require=False,
                       validate=validators.Boolean())

    @Configuration()
    def map(self, records):
        # self.logger.debug('mispgetioc.map')
        return records

    def reduce(self, records):

        # Phase 1: Preparation
        my_args = prepare_config(self)
        my_args['misp_url'] = my_args['misp_url'] + '/attributes/restSearch'

        jsonmode = False
        if self.mode is not None:
            if 'j' in self.mode and self.json_request is not None:
                jsonmode = True

        if jsonmode is True:
            pagination = True
            other_page = True
            body_dict = json.loads(self.json_request)
            logging.info('Option "json_request" set')
            body_dict['returnFormat'] = 'json'
            body_dict['withAttachments'] = False
            if 'limit' in body_dict:
                limit = int(body_dict['limit'])
                if limit == 0:
                    pagination = False
            else:
                limit = 10000

            if 'page' in body_dict:
                page = body_dict['page']
            else:
                page = 1
            page_length = 0
        else:
            # build search JSON object
            body_dict = {"returnFormat": "json", "withAttachments": False}

            # add provided parameters to JSON request body
            # specific formats
            if self.last is not None:
                body_dict['last'] = self.last
                logging.info('Option "last" set with %s', body_dict['last'])

            if self.date_from is not None:
                body_dict['from'] = self.date_from
                logging.info('Option "date_from" set with %s',
                             body_dict['from'])
                if self.date_to is not None:
                    body_dict['to'] = self.date_to
                    logging.info('Option "date_to" set with %s',
                                 body_dict['to'])
                else:
                    logging.info('Option "date_to" will be set to now().')

            if self.threat_level_id is not None:
                body_dict['threat_level_id'] = self.threat_level_id
                logging.info('Option "threat_level_id" set with %s',
                             body_dict['threat_level_id'])

            if self.org is not None:
                body_dict['org'] = self.org
                logging.info('Option "org" set')

            if self.eventid:
                if "," in self.eventid:
                    event_criteria = {}
                    event_list = self.eventid.split(",")
                    event_criteria['OR'] = event_list
                    body_dict['eventid'] = event_criteria
                else:
                    body_dict['eventid'] = self.eventid
                logging.info('Option "eventid" set')

            if self.value is not None:
                body_dict['value'] = self.value
                logging.info('Option "value" set')

            if self.eventinfo is not None:
                body_dict['eventinfo'] = self.eventinfo
                logging.info('Option "eventinfo" set')

            # CSV strings
            if self.category is not None:
                cat_criteria = {}
                cat_list = self.category.split(",")
                cat_criteria['OR'] = cat_list
                body_dict['category'] = cat_criteria
            if self.type is not None:
                type_criteria = {}
                type_list = self.type.split(",")
                type_criteria['OR'] = type_list
                body_dict['type'] = type_criteria
            if self.tags is not None or self.not_tags is not None:
                tags_criteria = {}
                if self.tags is not None:
                    tags_list = self.tags.split(",")
                    tags_criteria['OR'] = tags_list
                if self.not_tags is not None:
                    tags_list = self.not_tags.split(",")
                    tags_criteria['NOT'] = tags_list
                body_dict['tags'] = tags_criteria
            if self.uuid is not None:
                uuid_criteria = {}
                uuid_list = self.uuid.split(",")
                uuid_criteria['OR'] = uuid_list
                body_dict['uuid'] = uuid_criteria

            # Booleans
            if self.to_ids is not None:
                body_dict['to_ids'] = self.to_ids
                logging.info('Option "to_ids" set with %s',
                             body_dict['to_ids'])

            if self.enforceWarninglist is not None:
                body_dict['enforceWarninglist'] = self.enforceWarninglist
                logging.info('Option "enforceWarninglist" set with %s',
                             body_dict['enforceWarninglist'])

            if self.deleted is not None:
                body_dict['deleted'] = self.deleted
                logging.info('Option "deleted" set with %s',
                             body_dict['deleted'])

            if self.includeEventUuid is not None:
                body_dict['includeEventUuid'] = self.includeEventUuid
                logging.info('Option "includeEventUuid" set with %s',
                             body_dict['includeEventUuid'])

            if self.includeEventTags is not None:
                body_dict['includeEventTags'] = self.includeEventTags
                logging.info('Option "includeEventTags" set with %s',
                             body_dict['includeEventTags'])
            # Search pagination
            pagination = True
            other_page = True
            if self.page:
                page = self.page
            else:
                page = 1
            page_length = 0
            if self.limit is not None:
                if int(self.limit) == 0:
                    pagination = False
                else:
                    limit = int(self.limit)
            else:
                limit = 10000

        # set proper headers
        headers = {'Content-type': 'application/json'}
        headers['Authorization'] = my_args['misp_key']
        headers['Accept'] = 'application/json'

        results = []
        # add colums for each type in results
        while other_page:
            if pagination is True:
                body_dict['page'] = page
                body_dict['limit'] = limit

            body = json.dumps(body_dict)
            logging.debug('mispapireport request body: %s', body)
            # search
            r = requests.post(my_args['misp_url'],
                              headers=headers,
                              data=body,
                              verify=my_args['misp_verifycert'],
                              cert=my_args['client_cert_full_path'],
                              proxies=my_args['proxies'])
            # check if status is anything other than 200; throw an exception if it is
            r.raise_for_status()
            # response is 200 by this point or we would have thrown an exception
            response = r.json()
            if 'response' in response:
                if 'Attribute' in response['response']:
                    page_length = len(response['response']['Attribute'])
                    for a in response['response']['Attribute']:
                        v = {}
                        v['misp_Object'] = "-"
                        if self.includeEventTags is True:
                            v['misp_tag'] = "-"
                        for ak, av in a.items():
                            if ak == 'Event':
                                json_event = a['Event']
                                for ek, ev in json_event.items():
                                    key = 'misp_event_' + ek
                                    v[key] = str(ev)
                            elif ak == 'Tag':
                                tag_list = []
                                for tag in a['Tag']:
                                    try:
                                        tag_list.append(str(tag['name']))
                                    except Exception:
                                        pass
                                v['misp_tag'] = tag_list
                            else:
                                vkey = 'misp_' + ak
                                v[vkey] = av
                        results.append(v)

            if pagination is True:
                if page_length < limit:
                    other_page = False
                else:
                    page = page + 1
            else:
                other_page = False

        # add colums for each type in results
        typelist = []
        for r in results:
            if r['misp_type'] not in typelist:
                typelist.append(r['misp_type'])

        output_dict = {}
        increment = 1
        for r in results:
            key = str(r['misp_event_id']) + '_' + str(increment)
            increment = increment + 1
            v = r
            for t in typelist:
                misp_t = 'misp_' + t.replace('-', '_').replace('|', '_p_')
                if t == r['misp_type']:
                    v[misp_t] = r['misp_value']
                else:
                    v[misp_t] = ''
            output_dict[key] = v

        for k, v in output_dict.items():
            yield v
class ToSFXCommand(EventingCommand):
    """
    ## Syntax

    <command> | tosfx

    ## Description

    One or more datapoints are generated for each input event's field(s) of the
    form `gauge_*`, `counter_*` or `cumulative_counter_*`.  The metric name in
    SignalFx will be the `*` part of the field name.  Any additional fields on
    the event will be attached as dimensions to the generated datapoints.

    """

    access_token = Option()
    debug = Option(validate=validators.Boolean(), default=False)
    dry_run = Option(validate=validators.Boolean(), default=False)
    signalfx_realm = Option()
    ingest_url = Option()
    dp_endpoint = Option(default="/v2/event")

    def ensure_default_config(self):
        configs = configparser.ConfigParser(allow_no_value=True)
        local_config = os.path.abspath(
            os.path.join(os.getcwd(), "..", "local", "sfx.conf"))

        configs.read(local_config)

        def read_conf_value(field):
            try:
                return configs.get("setupentity", field)
            except configparser.NoOptionError:
                return None

        if not self.signalfx_realm:
            self.signalfx_realm = read_conf_value("signalfx_realm")
        if not self.ingest_url:
            self.ingest_url = read_conf_value("ingest_url")

        self.logger.error("getting access token")
        if not self.access_token:
            self.access_token = get_access_token(self.service)

    def transform(self, records):
        self.ensure_default_config()

        out = []
        payload = []
        for event in records:
            add_event_to_payload(self, event=event, payload=payload)

            if self.debug:
                event["endpoint"] = self.ingest_url + self.dp_endpoint

            out.append(event)

        self.logger.error(out)

        self.logger.error(payload)

        if not self.dry_run:
            resp = send_payload(
                payload=payload,
                target_url=compose_ingest_url(self.signalfx_realm,
                                              self.ingest_url,
                                              self.dp_endpoint),
                token=self.access_token,
            )
            for event in out:
                event["status"] = resp.status_code
                if resp.status_code != 200:
                    event["response_error"] = resp.content

        for event in out:
            yield event
Beispiel #23
0
class Bs4(StreamingCommand):
    """ A wrapper for BeautifulSoup4 to extract html/xml tags and text from them to use in Splunk.

    ##Syntax

    .. code-block::
       bs4 textfield=<field> [get_text=<bool>] [get_text_label=<string>] [parser=<string>] [find=<tag>] [find_attrs=<quoted_key:value_pairs>] [find_all=<tag>] [find_all_attrs=<quoted_key:value_pairs>] [find_child=<tag>] [find_child_attrs=<quoted_key:value_pairs>] [find_children=<tag>] [find_children_attrs=<quoted_key:value_pairs>]

    ##Description

    A wrapper script to bring some functionality from BeautifulSoup to Splunk. Default is to 
    get the text and send it to a new field 'get_text', otherwise the selection is returned 
    in a field named 'soup'. Default is to use the 'lxml' parser, though you can specify others, 
    'html5lib' is not currently included. The find methods can be used in conjuction, their order 
    of operation is find > find_all > find_child > find children. Each option has a similar
    named option appended '_attrs' that will accept inner and outer quoted key:value pairs for
    more precise selections.

    ##Example

    .. code-block::
        * | bs4 textfield=_raw find="div" get_text=t
    """

    textfield = Option(
        require=True,
        doc='''
        **Syntax:** **textfield=***<fieldname>*
        **Description:** Name of the field that will contain the text to search against''',
        validate=validators.Fieldname())

    parser = Option(
        default='lxml',
        doc='''
        **Syntax:** **parser=***<string>*
        **Description:** Corresponds to parsers listed here https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser (currently html5lib not packaged with so not an option)''',
        )
 
    find = Option(
        default=False,
        doc='''
        **Syntax:** **find=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find method''',
        )
 
    find_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    find_all = Option(
        default=False,
        doc='''
        **Syntax:** **find_all=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find_all method. Order of operation is find > find_all > find_child > find_children so can be used in conjunction''',
        )
 
    find_all_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_all_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find_all method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    find_child = Option(
        default=False,
        doc='''
        **Syntax:** **find_child=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find_child method. Order of operation is find > find_all > find_child > find_children so can be used in conjunction''',
        )

    find_child_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_child_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find_child method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    find_children = Option(
        default=False,
        doc='''
        **Syntax:** **find_children=***<tag>*
        **Description:** Corresponds to the name attribute of BeautifulSoup's find_children method. Order of operation is find > find_all > find_child > find_children so can be used in conjunction''',
        )

    find_children_attrs = Option(
        default=None,
        doc='''
        **Syntax:** **find_children_attrs=***<quoted_key:value_pairs>*
        **Description:** Corresponds to the attrs attribute of BeautifulSoup's find_children method. Expects inner and outer quoted "'key1':'value1','key2':'value2'" pairs comma-separated but contained in outer quotes.''',
        )
 
    get_text = Option(
        default=True,
        doc='''
        **Syntax:** **get_text=***<bool>*
        **Description:** If true, returns text minus html/xml formatting for given selection and places in field `get_text` otherwise returns the selection in a field called `soup1`''',
        validate=validators.Boolean())

    get_text_label = Option(
        default='get_text',
        doc='''
        **Syntax:** **get_text_label=***<string>*
        **Description:** If get_text is true, sets the label for the return field''',
        )

    #http://dev.splunk.com/view/logging/SP-CAAAFCN
    def setup_logging(self):
        logger = logging.getLogger('splunk.foo')    
        SPLUNK_HOME = os.environ['SPLUNK_HOME']
        
        LOGGING_DEFAULT_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc', 'log.cfg')
        LOGGING_LOCAL_CONFIG_FILE = os.path.join(SPLUNK_HOME, 'etc', 'log-local.cfg')
        LOGGING_STANZA_NAME = 'python'
        LOGGING_FILE_NAME = "nlp-text-analytics.log"
        BASE_LOG_PATH = os.path.join('var', 'log', 'splunk')
        LOGGING_FORMAT = "%(asctime)s %(levelname)-s\t%(module)s:%(lineno)d - %(message)s"
        splunk_log_handler = logging.handlers.RotatingFileHandler(
            os.path.join(
                SPLUNK_HOME,
                BASE_LOG_PATH,
                LOGGING_FILE_NAME
            ), mode='a') 
        splunk_log_handler.setFormatter(logging.Formatter(LOGGING_FORMAT))
        logger.addHandler(splunk_log_handler)
        setupSplunkLogger(
            logger,
            LOGGING_DEFAULT_CONFIG_FILE,
            LOGGING_LOCAL_CONFIG_FILE,
            LOGGING_STANZA_NAME
        )
        return logger

    def stream(self, records):
        for record in records:
            soup = BeautifulSoup(record[self.textfield], self.parser)
            if self.find:
                if self.find_attrs is not None:
                    soup = soup.find(
                        self.find, 
                        literal_eval('{'+self.find_attrs+'}')
                    )
                else:
                    soup = soup.find(self.find)
            if self.find_all:
                if self.find_all_attrs is not None:
                    soup = soup.find_all(
                        self.find_all, 
                        literal_eval('{'+self.find_all_attrs+'}')
                    )
                else:
                    soup = soup.find_all(self.find_all)
            if self.find_child:
                if self.find_child_attrs is not None:
                    soup = soup.findChild(
                        self.find_child, 
                        literal_eval('{'+self.find_child_attrs+'}')
                    )
                else:
                    soup = soup.findChild(self.find_child)
            if self.find_children:
                if self.find_children_attrs is not None:
                    soup = soup.findChildren(
                        self.find_children, 
                        literal_eval('{'+self.find_children_attrs+'}')
                    )
                else:
                    soup = soup.findChildren(self.find_children)
            if self.get_text and not (self.find_all or self.find_children):
                record[self.get_text_label] = \
                    soup.get_text().decode('unicode_escape').encode('ascii','ignore')
            elif self.get_text and (self.find_all or self.find_children):
                record[self.get_text_label] = [
                    i.get_text().decode('unicode_escape').encode('ascii','ignore')
                    for i in soup
                ]
            else:
                record['soup'] = soup

            yield record
Beispiel #24
0
class INSEECommand(GeneratingCommand):
    """ Synopsis

    ##Syntax

    | insee [dtr=date_to_retrieve] [proxy=true] [debug=true]

    ##Description

    Request the Sirene API

    """
    dtr = Option(require=False, validate=Date())
    debug = Option(require=False, validate=validators.Boolean())
    proxy = Option(require=False, validate=validators.Boolean())

    # https://www.sirene.fr/sirene/public/variable/tefet
    LIBTEFET = {
        'NN': 'Unités non employeuses',
        '00': '0 salarié',
        '01': '1 ou 2 salariés',
        '02': '3 à 5 salariés',
        '03': '6 à 9 salariés',
        '11': '10 à 19 salariés',
        '12': '20 à 49 salariés',
        '21': '50 à 99 salariés',
        '22': '100 à 199 salariés',
        '31': '200 à 249 salariés',
        '32': '250 à 499 salariés',
        '41': '500 à 999 salariés',
        '42': '1 000 à 1 999 salariés',
        '51': '2 000 à 4 999 salariés',
        '52': '5 000 à 9 999 salariés',
        '53': '10 000 salariés et plus'
    }

    # https://www.sirene.fr/sirene/public/variable/rpen
    RPEN = {
        '01': ['971'],
        '02': ['972'],
        '03': ['973'],
        '04': ['974'],
        '06': ['976'],
        '07': ['977'],
        '08': ['978'],
        '11': ['75', '77', '78', '91', '92', '93', '94', '95'],
        '24': ['18', '28', '36', '37', '41', '45'],
        '27': ['21', '25', '39', '58', '70', '71', '89', '90'],
        '28': ['14', '27', '50', '61', '76'],
        '32': ['02', '59', '60', '62', '80'],
        '44': ['08', '10', '51', '52', '54', '55', '57', '67', '68', '88'],
        '52': ['44', '49', '53', '72', '85'],
        '53': ['22', '29', '35', '56'],
        '75': [
            '16', '17', '19', '23', '24', '33', '40', '47', '64', '79', '86',
            '87'
        ],
        '76': [
            '09', '11', '12', '30', '31', '32', '34', '46', '48', '65', '66',
            '81', '82'
        ],
        '84': [
            '01', '03', '07', '15', '26', '38', '42', '43', '63', '69', '73',
            '74'
        ],
        '93': ['04', '05', '06', '13', '83', '84'],
        '94': ['2A', '2B'],
        '98': ['975', '984', '986', '987', '988'],
        '99': ['99'],
    }

    # https://www.sirene.fr/sirene/public/variable/depet
    DEPET = {''}

    count_in = 0
    count_out = 0

    def set_configuration(self):
        # Open the configuration file
        try:
            with open(
                    os.path.dirname(os.path.abspath(__file__)) +
                    '/configuration_json.txt', 'r') as conf_file:
                conf = json.load(conf_file)
        except ValueError:
            self.logger.error('  invalid JSON configuration file')
            raise ExceptionConfiguration(
                'Invalid JSON in the configuration file')
        except IOError:
            self.logger.error('  configuration file doesn\'t exist')
            raise ExceptionConfiguration('Missing configuration file')

        # Verify the configuration
        if self.proxy:
            if 'http_proxy' not in conf or 'https_proxy' not in conf:
                self.logger.error(
                    '  proxies are not defined in the configuration file')
                raise ExceptionConfiguration(
                    'Proxies are not defined in the configuration file')
            self.proxies = dict()
            self.proxies['http'] = conf['http_proxy']
            self.proxies['https'] = conf['https_proxy']

        if 'consumer_key' not in conf or 'consumer_secret' not in conf:
            self.logger.error(
                '  API credentials are not defined in the configuration file')
            raise ExceptionConfiguration(
                'Missing API credentials in the configuration file')

        if 'endpoint_token' not in conf or 'endpoint_etablissement' not in conf or 'endpoint_informations' not in conf:
            self.logger.error(
                '  API endpoints are not defined in the configuration file')
            raise ExceptionConfiguration(
                'Missing API endpoints in the configuration file')

        self.consumer_key = conf['consumer_key']
        self.consumer_secret = conf['consumer_secret']
        self.endpoint_token = conf['endpoint_token']
        self.endpoint_etablissement = conf['endpoint_etablissement']
        self.endpoint_informations = conf['endpoint_informations']
        self.bearer_token = self.get_api_token()

    def get_api_token(self):
        payload = {'grant_type': 'client_credentials'}
        basic_auth = HTTPBasicAuth(self.consumer_key, self.consumer_secret)
        if self.proxy:
            r = requests.post(self.endpoint_token,
                              auth=basic_auth,
                              data=payload,
                              proxies=self.proxies)
        else:
            r = requests.post(self.endpoint_token,
                              auth=basic_auth,
                              data=payload)

        if self.debug:
            self.logger.debug('  token response %s\n%s', r.headers, r.text)

        if r.headers['Content-Type'] and 'application/json' in r.headers[
                'Content-Type']:
            if r.status_code == 200:
                return r.json()['access_token']
            elif r.status_code == 401:
                self.logger.error('  incorrect credentials : %s',
                                  r.json()['error_description'])
            else:
                self.logger.error(
                    '  error during token retrieval. Code received : %d',
                    r.status_code)
        else:
            self.logger.error(
                '  error during token retrieval. Code received : %d',
                r.status_code)
        raise ExceptionToken('Error during API token retrieval')

    def get_status(self):
        # Initialize
        headers = {'Authorization': 'Bearer ' + self.bearer_token}

        if self.proxy:
            r = requests.get(self.endpoint_informations,
                             headers=headers,
                             proxies=self.proxies)
        else:
            r = requests.get(self.endpoint_informations, headers=headers)

        if self.debug:
            self.logger.debug('  status response %s\n%s', r.headers, r.text)

        while r.status_code == 429:
            # We made too many requests. We wait for the next rounded minute
            current_second = datetime.now().time().strftime('%S')
            time.sleep(60 - int(current_second) + 1)
            if self.proxy:
                r = requests.get(self.endpoint_informations,
                                 headers=headers,
                                 proxies=self.proxies)
            else:
                r = requests.get(self.endpoint_informations, headers=headers)
            if self.debug:
                self.logger.debug('  status response %s\n%s', r.headers,
                                  r.text)

        if r.headers['Content-Type'] and 'application/json' in r.headers[
                'Content-Type']:
            if r.status_code == 200:
                return r.json()
            elif r.status_code == 401:
                self.logger.error(
                    '  invalid bearer token %s in status request',
                    self.bearer_token)
            elif r.status_code == 406:
                self.logger.error('  invalid Accept header in status request')
            else:
                self.logger.error(
                    '  error during status retrieval. Code received : %d',
                    r.status_code)
        else:
            self.logger.error(
                '  error during status retrieval. Code received : %d',
                r.status_code)
        raise ExceptionStatus('Error during information retrieval')

    def get_siret(self,
                  q=None,
                  nombre=None,
                  curseur=None,
                  champs=None,
                  gzip=False):
        # Initialize
        payload = dict()
        if champs:
            payload['champs'] = champs
        if q:
            payload['q'] = q
        if nombre:
            payload['nombre'] = nombre
        if curseur:
            payload['curseur'] = curseur

        headers = {'Authorization': 'Bearer ' + self.bearer_token}

        if gzip:
            # Request GZip content
            headers['Accept-Encoding'] = 'gzip'

        if self.proxy:
            r = requests.get(self.endpoint_etablissement,
                             headers=headers,
                             params=payload,
                             proxies=self.proxies)
        else:
            r = requests.get(self.endpoint_etablissement,
                             headers=headers,
                             params=payload)

        if self.debug:
            self.logger.debug('  siret response %s\n%s', r.headers, r.text)

        while r.status_code == 429:
            # We made too many requests. We wait for the next rounded minute
            current_second = datetime.now().time().strftime('%S')
            time.sleep(60 - int(current_second) + 1)
            if self.proxy:
                r = requests.get(self.endpoint_etablissement,
                                 headers=headers,
                                 params=payload,
                                 proxies=self.proxies)
            else:
                r = requests.get(self.endpoint_etablissement,
                                 headers=headers,
                                 params=payload)
            if self.debug:
                self.logger.debug('  siret response %s\n%s', r.headers, r.text)

        internal_error_counter = 0
        while r.status_code == 500:
            # In case we get a 500 we prefer to retry our request before raising an error
            internal_error_counter += 1
            time.sleep(60)
            if self.proxy:
                r = requests.get(self.endpoint_etablissement,
                                 headers=headers,
                                 params=payload,
                                 proxies=self.proxies)
            else:
                r = requests.get(self.endpoint_etablissement,
                                 headers=headers,
                                 params=payload)
            if self.debug:
                self.logger.debug('  siret response %s\n%s', r.headers, r.text)
            if internal_error_counter == 10:
                break

        if r.headers['Content-Type'] and 'application/json' in r.headers[
                'Content-Type']:
            if r.status_code == 200:
                return r.json()
            elif r.status_code == 400:
                self.logger.error('  invalid parameters in query: %s',
                                  r.json()['header']['message'])
            elif r.status_code == 401:
                self.logger.error('  invalid bearer token %s in siret request',
                                  self.bearer_token)
            elif r.status_code == 404:
                self.logger.error('  unknown siret: %s',
                                  r.json()['header']['message'])
            elif r.status_code == 406:
                self.logger.error('  invalid Accept header in siret request')
            elif r.status_code == 414:
                self.logger.error('  siret request URI too long')
            else:
                self.logger.error(
                    '  error during siret retrieval. Code received : %d',
                    r.status_code)
        else:
            self.logger.error(
                '  error during siret retrieval. Code received : %d',
                r.status_code)

        raise ExceptionSiret('Error during siret retrieval')

    def get_updated_siret_records(self, date, curseur):
        # Which fields do we need
        champs = 'siren,nic,siret,complementAdresseEtablissement,numeroVoieEtablissement,indiceRepetitionEtablissement,' \
                 'typeVoieEtablissement,libelleVoieEtablissement,codePostalEtablissement,libelleCedexEtablissement,' \
                 'codeCommuneEtablissement,libelleCommuneEtablissement'

        # Build the filter
        q = 'dateDernierTraitementEtablissement:' + date

        j = self.get_siret(q=q, curseur=curseur, nombre=1000, gzip=True)
        try:
            header = j['header']
            etablissements = j['etablissements']
            curseur_suivant = header['curseurSuivant']
            total = header['total']
            # Get header for debugging purposes
            if self.debug:
                self.logger.debug('  header siret %s', header)
        except KeyError as e:
            self.logger.error('  missing key in response from API: %s', e)
            raise ExceptionUpdatedSiret('Error during headquarters retrieval')

        return total, curseur_suivant, etablissements

    @staticmethod
    def chunks(l, n):
        """Yield successive n-sized chunks from l."""
        for i in xrange(0, len(l), n):
            yield l[i:i + n]

    def get_etablissements_siege(self, siret_to_retrieve):
        # Which fields do we need
        champs = 'siren,nic,siret,etablissementSiege,codeCommuneEtablissement,codePaysEtrangerEtablissement'

        # Retrieve 85 records at each request
        # If we have more than 85 siret, the query is too long and blocked by INSEE
        step = 85
        sieges = dict()
        for chunk in list(self.chunks(siret_to_retrieve, step)):
            q = ''
            for siret in chunk:
                q += 'siret:' + siret + ' OR '
            q = q[:-4]
            try:
                j = self.get_siret(q=q, nombre=step, champs=champs, gzip=True)
            except ExceptionSiret:
                continue
            try:
                header = j['header']
                for s in j['etablissements']:
                    sieges[s['siret']] = s
                # Get header for debugging purposes
                if self.debug:
                    self.logger.debug('  header siret %s', header)
            except KeyError as e:
                self.logger.error('  missing key in response from API: %s', e)
                raise ExceptionHeadquarters(
                    'Error during headquarters retrieval')

        self.logger.info('  retrieved %d of %d headquarters', len(sieges),
                         len(siret_to_retrieve))

        return sieges

    def generate_siret(self, siret, siret_siege):
        new_siret = OrderedDict()
        v = lambda t: '' if t is None else t.encode('utf-8')
        try:
            u = siret['uniteLegale']
            a = siret['adresseEtablissement']
            # This field is unused
            a2 = siret['adresse2Etablissement']
            p = siret['periodesEtablissement'][0]

            new_siret['SIREN'] = v(siret['siren'])
            new_siret['NIC'] = v(siret['nic'])
            # Physical person
            sul = None
            if v(u['categorieJuridiqueUniteLegale']) == '1000':
                if v(u['sexeUniteLegale']):
                    sul = v(u['sexeUniteLegale'])
                    if sul == 'F':
                        sul = 'MADAME'
                    elif sul == 'M':
                        sul = 'MONSIEUR'
                if v(u['nomUsageUniteLegale']):
                    nul = v(u['nomUsageUniteLegale'])
                else:
                    nul = v(u['nomUniteLegale'])
                puul = v(u['prenomUsuelUniteLegale'])
                new_siret['L1_NORMALISEE'] = ' '.join(
                    filter(None, [sul, puul, nul]))
            else:
                new_siret['L1_NORMALISEE'] = v(u['denominationUniteLegale'])
            new_siret['L2_NORMALISEE'] = ''
            nve = v(a['numeroVoieEtablissement'])
            tve = v(a['typeVoieEtablissement'])
            lve = v(a['libelleVoieEtablissement'])
            new_siret['L3_NORMALISEE'] = ' '.join(filter(
                None, [nve, tve, lve]))
            new_siret['L4_NORMALISEE'] = ''
            new_siret['L5_NORMALISEE'] = ''
            cpe = v(a['codePostalEtablissement'])
            lce = v(a['libelleCommuneEtablissement'])
            new_siret['L6_NORMALISEE'] = ' '.join(filter(None, [cpe, lce]))
            if a['codePaysEtrangerEtablissement'] and a[
                    'libellePaysEtrangerEtablissement']:
                new_siret['L7_NORMALISEE'] = a[
                    'libellePaysEtrangerEtablissement'].encode('utf-8')
            else:
                new_siret['L7_NORMALISEE'] = 'FRANCE'.encode('utf-8')
            new_siret['L1_DECLAREE'] = new_siret['L1_NORMALISEE']
            new_siret['L2_DECLAREE'] = ''
            new_siret['L3_DECLAREE'] = new_siret['L3_NORMALISEE']
            new_siret['L4_DECLAREE'] = ''
            new_siret['L5_DECLAREE'] = ''
            new_siret['L6_DECLAREE'] = ''
            new_siret['L7_DECLAREE'] = new_siret['L7_NORMALISEE']
            new_siret['NUMVOIE'] = v(a['numeroVoieEtablissement'])
            new_siret['INDREP'] = v(a['indiceRepetitionEtablissement'])
            new_siret['TYPVOIE'] = v(a['typeVoieEtablissement'])
            new_siret['LIBVOIE'] = v(a['libelleVoieEtablissement'])
            new_siret['CODPOS'] = v(a['codePostalEtablissement'])
            new_siret['CEDEX'] = v(a['codeCedexEtablissement'])
            new_siret['RPET'] = ''
            new_siret['LIBREG'] = ''
            new_siret['DEPET'] = v(a['codeCommuneEtablissement'])[:2]
            new_siret['ARRONET'] = ''
            new_siret['CTONET'] = ''
            new_siret['COMET'] = v(a['codeCommuneEtablissement'])
            new_siret['LIBCOM'] = v(a['libelleCommuneEtablissement'])
            new_siret['DU'] = ''
            new_siret['TU'] = ''
            new_siret['UU'] = ''
            new_siret['EPCI'] = ''
            new_siret['TCD'] = ''
            new_siret['ZEMET'] = ''
            if siret['etablissementSiege']:
                new_siret['SIEGE'] = 1
            else:
                new_siret['SIEGE'] = 0
            new_siret['ENSEIGNE'] = v(p['enseigne1Etablissement'])
            new_siret['IND_PUBLIPO'] = ''
            new_siret['DIFFCOM'] = 'O'.encode('utf-8')
            new_siret['AMINTRET'] = date.today().strftime('%Y%m')
            new_siret['NATETAB'] = ''
            new_siret['LIBNATETAB'] = ''
            new_siret['APET700'] = v(
                p['activitePrincipaleEtablissement']).replace('.', '')
            new_siret['LIBAPET'] = v(p['activitePrincipaleEtablissement'])
            new_siret['DAPET'] = ''
            new_siret['TEFET'] = v(siret['trancheEffectifsEtablissement'])
            if siret['trancheEffectifsEtablissement']:
                new_siret['LIBTEFET'] = self.LIBTEFET[
                    siret['trancheEffectifsEtablissement']]
            else:
                new_siret['LIBTEFET'] = ''
            new_siret['EFETCENT'] = ''
            new_siret['DEFET'] = v(siret['anneeEffectifsEtablissement'])
            new_siret['ORIGINE'] = ''
            new_siret['DCRET'] = v(siret['dateCreationEtablissement']).replace(
                '-', '')
            new_siret['DDEBACT'] = ''
            new_siret['ACTIVNAT'] = ''
            new_siret['LIEUACT'] = ''
            new_siret['ACTISURF'] = ''
            new_siret['SAISONAT'] = ''
            new_siret['MODET'] = ''
            new_siret['PRODET'] = ''
            new_siret['PRODPART'] = ''
            new_siret['AUXILT'] = ''
            # Physical person
            if v(u['categorieJuridiqueUniteLegale']) == '1000':
                nul = v(u['nomUniteLegale'])
                p1ul = v(u['prenom1UniteLegale'])
                p2ul = v(u['prenom2UniteLegale'])
                p3ul = v(u['prenom3UniteLegale'])
                p4ul = v(u['prenom4UniteLegale'])
                pul = ' '.join(filter(None, [p1ul, p2ul, p3ul, p4ul]))
                if v(u['nomUsageUniteLegale']):
                    new_siret['NOMEN_LONG'] = nul + '*' + v(
                        u['nomUsageUniteLegale']) + '/' + pul + '/'
                else:
                    new_siret['NOMEN_LONG'] = nul + '*' + pul + '/'
            else:
                new_siret['NOMEN_LONG'] = v(u['denominationUniteLegale'])
            new_siret['SIGLE'] = v(u['sigleUniteLegale'])
            new_siret['NOM'] = v(u['nomUniteLegale'])
            new_siret['PRENOM'] = v(u['prenom1UniteLegale'])
            new_siret['CIVILITE'] = ''
            if v(u['sexeUniteLegale']) == 'F':
                new_siret['CIVILITE'] = 2
            elif v(u['sexeUniteLegale']) == 'M':
                new_siret['CIVILITE'] = 1
            new_siret['RNA'] = v(u['identifiantAssociationUniteLegale'])
            new_siret['NICSIEGE'] = v(u['nicSiegeUniteLegale'])
            if siret['etablissementSiege']:
                if v(a['codePaysEtrangerEtablissement']):
                    cce = v(a['codePaysEtrangerEtablissement'])
                else:
                    cce = v(a['codeCommuneEtablissement'])
                department = cce[:3]
                rpen = ''
                for key, value in self.RPEN.items():
                    if department in value:
                        rpen = key
                if rpen == '':
                    department = cce[:2]
                    for key, value in self.RPEN.items():
                        if department in value:
                            rpen = key
            else:
                rpen = ''
                cce = ''
                try:
                    siege = siret_siege[v(siret['siren']) +
                                        v(u['nicSiegeUniteLegale'])]
                except KeyError as e:
                    self.logger.info(
                        '  siret %s has an invalid headquarter %s',
                        v(siret['siret']),
                        v(siret['siren']) + v(u['nicSiegeUniteLegale']))
                else:
                    if v(siege['adresseEtablissement']
                         ['codePaysEtrangerEtablissement']):
                        cce = v(siege['adresseEtablissement']
                                ['codePaysEtrangerEtablissement'])
                    else:
                        cce = v(siege['adresseEtablissement']
                                ['codeCommuneEtablissement'])
                    department = cce[:3]
                    rpen = ''
                    for key, value in self.RPEN.items():
                        if department in value:
                            rpen = key
                    if rpen == '':
                        department = cce[:2]
                        for key, value in self.RPEN.items():
                            if department in value:
                                rpen = key
            new_siret['RPEN'] = rpen
            new_siret['DEPCOMEN'] = cce
            new_siret['ADR_MAIL'] = ''
            new_siret['NJ'] = v(u['categorieJuridiqueUniteLegale'])
            new_siret['LIBNJ'] = v(u['categorieJuridiqueUniteLegale'])
            new_siret['APEN700'] = v(
                u['activitePrincipaleUniteLegale']).replace('.', '')
            new_siret['LIBAPEN'] = v(u['activitePrincipaleUniteLegale'])
            new_siret['DAPEN'] = ''
            new_siret['APRM'] = v(
                siret['activitePrincipaleRegistreMetiersEtablissement'])
            new_siret['ESS'] = v(u['economieSocialeSolidaireUniteLegale'])
            new_siret['DATEESS'] = ''
            new_siret['TEFEN'] = v(u['trancheEffectifsUniteLegale'])
            if u['trancheEffectifsUniteLegale']:
                new_siret['LIBTEFEN'] = self.LIBTEFET[
                    u['trancheEffectifsUniteLegale']]
            else:
                new_siret['LIBTEFEN'] = ''
            new_siret['EFENCENT'] = ''
            new_siret['DEFEN'] = v(u['anneeEffectifsUniteLegale'])
            new_siret['CATEGORIE'] = v(u['categorieEntreprise'])
            new_siret['DCREN'] = v(u['dateCreationUniteLegale'])
            new_siret['AMINTREN'] = date.today().strftime('%Y%m')
            new_siret['MONOACT'] = ''
            new_siret['MODEN'] = ''
            new_siret['PRODEN'] = ''
            new_siret['ESAANN'] = ''
            new_siret['TCA'] = ''
            new_siret['ESAAPEN'] = ''
            new_siret['ESASEC1N'] = ''
            new_siret['ESASEC2N'] = ''
            new_siret['ESASEC3N'] = ''
            new_siret['ESASEC4N'] = ''
            if v(p['etatAdministratifEtablissement']) == 'A':
                new_siret['VMAJ'] = 'C'
                self.count_in += 1
            elif v(p['etatAdministratifEtablissement']) == 'F':
                new_siret['VMAJ'] = 'O'
                self.count_out += 1
            new_siret['VMAJ1'] = ''
            new_siret['VMAJ2'] = ''
            new_siret['VMAJ3'] = ''
            new_siret['DATEMAJ'] = v(
                siret['dateDernierTraitementEtablissement'])
            if v(p['etatAdministratifEtablissement']) == 'A':
                new_siret['EVE'] = 'CE'
            elif v(p['etatAdministratifEtablissement']) == 'F':
                new_siret['EVE'] = 'O'
            new_siret['DATEVE'] = v(
                siret['dateDernierTraitementEtablissement'])[:10].replace(
                    '-', '')
            new_siret['TYPCREH'] = ''
            new_siret['DREACTET'] = ''
            new_siret['DREACTEN'] = ''
            new_siret['MADRESSE'] = ''
            new_siret['MENSEIGNE'] = ''
            new_siret['MAPET'] = ''
            new_siret['MPRODET'] = ''
            new_siret['MAUXILT'] = ''
            new_siret['MNOMEN'] = ''
            new_siret['MSIGLE'] = ''
            new_siret['MNICSIEGE'] = ''
            new_siret['MNJ'] = ''
            new_siret['MAPEN'] = ''
            new_siret['MPRODEN'] = ''
            new_siret['SIRETPS'] = ''
            new_siret['TEL'] = ''
        except KeyError as e:
            self.logger.error('  missing key in siret received from API: %s',
                              e)
            if self.debug:
                self.logger.debug('  siret to update: %s', siret)
                self.logger.debug('  new_siret object: %s', new_siret)
            raise ExceptionTranslation('Error during siret translation')

        raw = ''.join(k + '=' + '\"{0}\"'.format(v) + ' '
                      for k, v in new_siret.items())
        return raw

    def generate(self):
        try:
            self.set_configuration()

            # CSV header
            csv_header = [
                'SIREN', 'NIC', 'L1_NORMALISEE', 'L2_NORMALISEE',
                'L3_NORMALISEE', 'L4_NORMALISEE', 'L5_NORMALISEE',
                'L6_NORMALISEE', 'L7_NORMALISEE', 'L1_DECLAREE', 'L2_DECLAREE',
                'L3_DECLAREE', 'L4_DECLAREE', 'L5_DECLAREE', 'L6_DECLAREE',
                'L7_DECLAREE', 'NUMVOIE', 'INDREP', 'TYPVOIE', 'LIBVOIE',
                'CODPOS', 'CEDEX', 'RPET', 'LIBREG', 'DEPET', 'ARRONET',
                'CTONET', 'COMET', 'LIBCOM', 'DU', 'TU', 'UU', 'EPCI', 'TCD',
                'ZEMET', 'SIEGE', 'ENSEIGNE', 'IND_PUBLIPO', 'DIFFCOM',
                'AMINTRET', 'NATETAB', 'LIBNATETAB', 'APET700', 'LIBAPET',
                'DAPET', 'TEFET', 'LIBTEFET', 'EFETCENT', 'DEFET', 'ORIGINE',
                'DCRET', 'DDEBACT', 'ACTIVNAT', 'LIEUACT', 'ACTISURF',
                'SAISONAT', 'MODET', 'PRODET', 'PRODPART', 'AUXILT',
                'NOMEN_LONG', 'SIGLE', 'NOM', 'PRENOM', 'CIVILITE', 'RNA',
                'NICSIEGE', 'RPEN', 'DEPCOMEN', 'ADR_MAIL', 'NJ', 'LIBNJ',
                'APEN700', 'LIBAPEN', 'DAPEN', 'APRM', 'ESS', 'DATEESS',
                'TEFEN', 'LIBTEFEN', 'EFENCENT', 'DEFEN', 'CATEGORIE', 'DCREN',
                'AMINTREN', 'MONOACT', 'MODEN', 'PRODEN', 'ESAANN', 'TCA',
                'ESAAPEN', 'ESASEC1N', 'ESASEC2N', 'ESASEC3N', 'ESASEC4N',
                'VMAJ', 'VMAJ1', 'VMAJ2', 'VMAJ3', 'DATEMAJ', 'EVE', 'DATEVE',
                'TYPCREH', 'DREACTET', 'DREACTEN', 'MADRESSE', 'MENSEIGNE',
                'MAPET', 'MPRODET', 'MAUXILT', 'MNOMEN', 'MSIGLE', 'MNICSIEGE',
                'MNJ', 'MAPEN', 'MPRODEN', 'SIRETPS', 'TEL'
            ]

            # Get status
            status_object = self.get_status()
            if status_object:
                if 'versionService' in status_object:
                    self.logger.info(
                        '  versionService %s',
                        status_object['versionService'].encode('utf-8'))
                if 'datesDernieresMisesAJourDesDonnees' in status_object:
                    for collection in status_object[
                            'datesDernieresMisesAJourDesDonnees']:
                        msg = ''
                        if 'collection' in collection and collection[
                                'collection']:
                            msg += 'collection %s' % collection[
                                'collection'].encode('utf-8')
                            msg += ' '
                        if 'dateDerniereMiseADisposition' in collection and collection[
                                'dateDerniereMiseADisposition']:
                            msg += 'dateDerniereMiseADisposition %s' %\
                                   collection['dateDerniereMiseADisposition'].encode('utf-8')
                            msg += ' '
                        if 'dateDernierTraitementDeMasse' in collection and collection[
                                'dateDernierTraitementDeMasse']:
                            msg += 'dateDernierTraitementDeMasse %s' %\
                                   collection['dateDernierTraitementDeMasse'].encode('utf-8')
                            msg += ' '
                        if 'dateDernierTraitementMaximum' in collection and collection[
                                'dateDernierTraitementMaximum']:
                            msg += 'dateDernierTraitementMaximum %s' % \
                                   collection['dateDernierTraitementMaximum'].encode('utf-8')
                            msg += ' '
                        self.logger.info('  %s', msg.encode('utf-8'))

            # Date to retrieve has been set
            if self.dtr:
                day_to_retrieve = self.dtr
            # Day before yesterday
            else:
                day_to_retrieve = (date.today() -
                                   timedelta(1)).strftime('%Y-%m-%d')

            # Log the requested date to help debugging
            self.logger.info('  dtr: %s', day_to_retrieve.encode('utf-8'))
            # Log the username to help debugging
            self.logger.info(
                '  Splunk username: %s',
                self._metadata.searchinfo.username.encode('utf-8'))

            event = 1
            curseur = '*'
            first_call = True
            received_siret = 0
            while True:
                _, curseur_suivant, updated_siret_list = self.get_updated_siret_records(
                    day_to_retrieve, curseur)

                if first_call:
                    self.logger.info(
                        '  retrieved a total of %d siret to update', _)
                    first_call = False
                self.logger.info(
                    '  retrieved %d siret to update in this window',
                    len(updated_siret_list))
                received_siret += len(updated_siret_list)
                self.logger.info('  retrieved %d siret / %d', received_siret,
                                 _)

                siret_to_retrieve = list()
                for siret in updated_siret_list:
                    if not siret['etablissementSiege']:
                        if siret['siren'] + siret['uniteLegale'][
                                'nicSiegeUniteLegale'] not in siret_to_retrieve:
                            siret_to_retrieve.append(
                                siret['siren'] +
                                siret['uniteLegale']['nicSiegeUniteLegale'])

                # We retrieve all headquarters
                siret_siege = self.get_etablissements_siege(siret_to_retrieve)
                for siret in updated_siret_list:
                    raw_data = self.generate_siret(siret, siret_siege)
                    yield {
                        '_time': time.time(),
                        'event_no': event,
                        '_raw': raw_data
                    }
                    event += 1

                # We get the same curseur so we get all updated siret
                if curseur_suivant == curseur:
                    break

                curseur = curseur_suivant

            self.logger.info('  generated %d events', event - 1)
            self.logger.info('  found %d SIRET to create', self.count_in)
            self.logger.info('  found %d SIRET to delete', self.count_out)

        except (ExceptionTranslation, ExceptionHeadquarters,
                ExceptionUpdatedSiret, ExceptionSiret, ExceptionStatus,
                ExceptionToken, ExceptionConfiguration):
            raise

        # This is a bad practise, but we want a specific message in log file
        # This case means that the code is missing an Exception handling
        except Exception as e:
            self.logger.error(
                '  unhandled exception has occurred. Traceback is in splunklib.log: %s',
                e.message)
            raise
Beispiel #25
0
class genatklayerCommand(StreamingCommand):
    """ Counts the number of non-overlapping matches to a regular expression in a set of fields.
    ##Syntax
    .. code-block::
        genatklayer name=<string> description=<string> reset=<bool>
    ##Description
    Takes input from a search and attempts to map it to Att&ck framework techniques and create a layer file. 
    The parameter `name` is the name for the layer file as it will be stored in KVStore.
    The parameter `description` is the description for the layer file as it will be stored in KVstore.
    ##Example
    Read in some data (tweets) and atttempt to save layer file to KVStore with name "my name" and description "my description"
    .. code-block::
        | inputlookup tweets | genatklayer name="my name" description="my description"
    """
    atkfield = Option(
        doc='''
        **Syntax:** **atkfield=***<field that stores att&ck technique id>*
        **Description:** The name of the field in your search results that has the att&ck technique id''',
        require=False, validate=None)

    layername = Option(
        doc='''
        **Syntax:** **name=***<layer name>*
        **Description:** What name you want to give the layer in KVStore''',
        require=False, validate=None)

    description = Option(
        doc='''
        **Syntax:** **description=***<layer description>*
        **Description:** What description you want to provide for the layer in KVStore''',
        require=False, validate=None)

    reset = Option(
        doc='''
        **Syntax:** **reset=***<bool>*
        **Description:** Reset the default layer back to its original state''',
        require=False, validate=validators.Boolean())



    # Base ATT&CK Navigator layer / template
    layer_json = { \
        "version": VERSION, \
        "name": NAME, \
        "description": DESCRIPTION, \
        "domain": DOMAIN, \
        "techniques": [] \
    }


    # per attack map - makes some color codes
    # add a color gradient (white -> red) to layer
    # ranging from zero (white) to the maximum score in the file (red)
    # To be implemented later
    """
    layer_json["gradient"] = {
        "colors": [
            "#ffffff",
            "#ff6666"
        ],
        "minValue": 0,
        "maxValue": max([technique["score"] for technique in layer_json["techniques"]])
    }
    """

    def getDefaultLayer(self, uri):
        r, c = splunk.rest.simpleRequest(uri, sessionKey=self.metadata.searchinfo.session_key, rawResult=True)
        # case where default layer is found via its _key value in kvstore
        if r.status == 200:
            return json.loads(c)
        # case where our default layer wasnt yet loaded into KVStore
        if r.status == 404:
            r, c = splunk.rest.simpleRequest(COLLECTION_URI, jsonargs=json.dumps(layer_default), sessionKey=self.metadata.searchinfo.session_key, rawResult=True) 
            return {"error":"Layer file not loaded in KVStore, it has now been loaded on your behalf"}
        else:
            return {"error":json.loads(c)}

    def resetDefaultLayer(self,uri):
        # if layer exists - error similar to the below will be thrown
        # {"messages":[{"type":"ERROR","text":"A document with the same key and user already exists."}]}
        r, c = splunk.rest.simpleRequest(uri, jsonargs=json.dumps(layer_default), sessionKey=self.metadata.searchinfo.session_key, rawResult=True)    
        if r.status == 200:
            return json.loads(c)
        else:
            return json.loads(c)

    def saveCustomLayer(self, layer_data):
        CUSTOM_LAYER_URI = "/servicesNS/Nobody/{}/storage/collections/data/attack_layers/{}?output_mode=json".format(appname,self.layername)
        # test to see if the custom layer already exists
        r, c = splunk.rest.simpleRequest(CUSTOM_LAYER_URI, sessionKey=self.metadata.searchinfo.session_key, rawResult=True)    
        # yes, layer does exist
        if r.status == 200:
            # lets overwrite it
            r, c = splunk.rest.simpleRequest(CUSTOM_LAYER_URI, jsonargs=json.dumps(layer_data), sessionKey=self.metadata.searchinfo.session_key, rawResult=True) 
            return {"error":"Layer file not loaded in KVStore, it has now been loaded on your behalf"}

        # no, layer does not exist, lets creat it and save it, note we need to drop down to the collection URI to POST our args
        if r.status == 404:
            r, c = splunk.rest.simpleRequest(COLLECTION_URI, jsonargs=json.dumps(layer_data), sessionKey=self.metadata.searchinfo.session_key, rawResult=True) 
            return {"error":"Layer file not loaded in KVStore, it has now been loaded on your behalf"}

        #some other generic error
        else:
            return {"error":json.loads(c)}


    def stream(self, records):

        self.logger.debug('genatklayerCommand: %s', self)  # logs command line
        if self.reset:
            resp = self.resetDefaultLayer(DEFAULT_URI)
            for record in records:
                record['_raw'] = json.dumps(resp)
                yield record
        
        # attempt to get the default layer
        default_layer = self.getDefaultLayer(DEFAULT_URI)
        if "error" in default_layer:
            raise Exception("Error retrieving layer. {}".format(str(default_layer['error'])))

        # iterate through our search results
        for record in records:
            # determine if the user specified a field to key off of for Technique ID
            # and if so, proceed
            if self.atkfield in record:
                # iterate through the techniques array in our layer file
                # we also will set our layers "scores" values per technique ID 
                # to zero if we dont have a value yet, otherwise, proceed
                for tech in default_layer['techniques']:
                    # Scoring example - reserving for later
                    #if 'score' not in tech:
                    #    tech['score'] = 0
                    # determine if we have a match in this case between
                    # a technique ID in our layer file and in our splunk record
                    # as well as check if our atkfiled is mv
                    # case where it is an mvfield
                    if(isinstance(record[self.atkfield],list)):
                        for item in record[self.atkfield]:
                            if tech['techniqueID'] == six.text_type(item):
                            # if there is a match, see if there's also a detected field in our splunk results
                            # and if so, update the layer info to reflect that
                                if 'detected' in record:
                                    if six.text_type(record['detected']) == "-1":
                                        tech['color'] = RED_LT
                                        #tech['score'] = tech['score'] + 1
                                    elif six.text_type(record['detected']) == "0":
                                        tech['color'] = YELLOW_DK
                                    elif six.text_type(record['detected']) == "1":
                                        tech['color'] = BLUE_1
                                    elif six.text_type(record['detected']) == "2":
                                        tech['color'] = BLUE_2
                                    elif six.text_type(record['detected']) == "3":
                                        tech['color'] = BLUE_3
                                    elif six.text_type(record['detected']) == "4":
                                        tech['color'] = BLUE_4

                                
                    #case where it is not an mv field
                    elif tech['techniqueID'] == six.text_type(record[self.atkfield]):
                        # if there is a match, see if there's also a detected field in our splunk results
                        # and if so, update the layer info to reflect that
                        if 'detected' in record:
                            if six.text_type(record['detected']) == "-1":
                                tech['color'] = RED_LT
                                #tech['score'] = tech['score'] + 1
                            elif six.text_type(record['detected']) == "0":
                                tech['color'] = YELLOW_DK
                            elif six.text_type(record['detected']) == "1":
                                tech['color'] = BLUE_1
                            elif six.text_type(record['detected']) == "2":
                                tech['color'] = BLUE_2
                            elif six.text_type(record['detected']) == "3":
                                tech['color'] = BLUE_3
                            elif six.text_type(record['detected']) == "4":
                                tech['color'] = BLUE_4
            else:
                record['_raw'] = "Error no field with that name exists {}".format(self.atkfield)
                raise Exception("Error no field with that name exists {}".format(self.atkfield))
            yield record
        # if the user passes a name arg then create the new kvstore entry for that new layer
        # will want to update this code in the future to handle error cases better, and user feedback

        if self.layername is not None:
            default_layer['_key'] = self.layername
            status = self.saveCustomLayer(default_layer)
            self.logger.debug('custom layer file requested: {}'.format(status))
        # post updated default layer if all was successful & we didn't get a name argument
        else:    
            r, c = splunk.rest.simpleRequest(DEFAULT_URI, jsonargs=json.dumps(default_layer), sessionKey=self.metadata.searchinfo.session_key, rawResult=True)    
            if r.status == 200:
                self.logger.debug('updated default layer successfully: {}'.format(json.loads(c)))
            else:
                self.logger.debug('error updating default layer successfully: {}'.format(json.loads(c)))
Beispiel #26
0
class MispCollectCommand(GeneratingCommand):
    """ get the attributes from a MISP instance.
    ##Syntax
    .. code-block::
        | mispgetioc misp_instance=<input> last=<int>(d|h|m)
        | mispgetioc misp_instance=<input> event=<id1>(,<id2>,...)
        | mispgetioc misp_instance=<input> date=<<YYYY-MM-DD>
                                           (date_to=<YYYY-MM-DD>)
    ##Description
    {
        "returnFormat": "mandatory",
        "page": "optional",
        "limit": "optional",
        "value": "optional",
        "type": "optional",
        "category": "optional",
        "org": "optional",
        "tags": "optional",
        "date": "optional",
        "last": "optional",
        "eventid": "optional",
        "withAttachments": "optional",
        "uuid": "optional",
        "publish_timestamp": "optional",
        "timestamp": "optional",
        "enforceWarninglist": "optional",
        "to_ids": "optional",
        "deleted": "optional",
        "includeEventUuid": "optional",
        "includeEventTags": "optional",
        "event_timestamp": "optional",
        "threat_level_id": "optional",
        "eventinfo": "optional",
        "includeProposals": "optional",
        "includeDecayScore": "optional",
        "includeFullModel": "optional",
        "decayingModel": "optional",
        "excludeDecayed": "optional",
        "score": "optional"
    }
    # status
        "returnFormat": forced to json,
        "page": param,
        "limit": param,
        "value": not managed,
        "type": param, CSV string,
        "category": param, CSV string,
        "org": not managed,
        "tags": param, see also not_tags
        "date": param,
        "last": param,
        "eventid": param,
        "withAttachments": forced to false,
        "uuid": not managed,
        "publish_timestamp": managed via param last
        "timestamp": not managed,
        "enforceWarninglist": param,
        "to_ids": param,
        "deleted": forced to False,
        "includeEventUuid": set to True,
        "includeEventTags": param,
        "event_timestamp":  not managed,
        "threat_level_id":  not managed,
        "eventinfo": not managed,
        "includeProposals": not managed
        "includeDecayScore": not managed,
        "includeFullModel": not managed,
        "decayingModel": not managed,
        "excludeDecayed": not managed,
        "score": not managed
    }
    """
    # MANDATORY MISP instance for this search
    misp_instance = Option(
        doc='''
        **Syntax:** **misp_instance=instance_name*
        **Description:** MISP instance parameters
        as described in local/misp42splunk_instances.conf.''',
        require=True)
    # MANDATORY: json_request XOR eventid XOR last XOR date
    json_request = Option(
        doc='''
        **Syntax:** **json_request=***valid JSON request*
        **Description:**Valid JSON request''',
        require=False)
    eventid = Option(
        doc='''
        **Syntax:** **eventid=***id1(,id2,...)*
        **Description:**list of event ID(s) or event UUID(s).''',
        require=False, validate=validators.Match("eventid", r"^[0-9a-f,\-]+$"))
    last = Option(
        doc='''
        **Syntax:** **last=***<int>d|h|m*
        **Description:** publication duration in day(s), hour(s) or minute(s).
        **nota bene:** last is an alias of published_timestamp''',
        require=False, validate=validators.Match("last", r"^[0-9]+[hdm]$"))
    date = Option(
        doc='''
        **Syntax:** **date=***The user set event date field
         - any of valid time related filters"*
        **Description:**starting date.
         **eventid**, **last** and **date** are mutually exclusive''',
        require=False)
    # Other params
    category = Option(
        doc='''
        **Syntax:** **category=***CSV string*
        **Description:**Comma(,)-separated string of categories to search for.
         Wildcard is %.''',
        require=False)
    endpoint = Option(
        doc='''
        **Syntax:** **endpoint=***<events|attributes>*
        **Description:**selection of MISP API restSearch endpoint.
        **default**: /attributes/restSearch''',
        require=False, validate=validators.Match("endpoint", r"(events|attributes)"))
    geteventtag = Option(
        doc='''
        **Syntax:** **geteventtag=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**Boolean includeEventTags. By default only
         attribute tag(s) are returned.''',
        require=False, validate=validators.Boolean())
    keep_related = Option(
        doc='''
        **Syntax:** **keep_related=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**Boolean to keep related events.
        default is to drop  RelatedEvents to reduce volume.''',
        require=False, validate=validators.Boolean())
    limit = Option(
        doc='''
        **Syntax:** **limit=***<int>*
        **Description:**define the limit for each MISP search;
         default 1000. 0 = no pagination.''',
        require=False, validate=validators.Match("limit", r"^[0-9]+$"))
    not_tags = Option(
        doc='''
        **Syntax:** **not_tags=***CSV string*
        **Description:**Comma(,)-separated string of tags to exclude.
         Wildcard is %.''',
        require=False)
    page = Option(
        doc='''
        **Syntax:** **page=***<int>*
        **Description:**define the page for each MISP search; default 1.''',
        require=False, validate=validators.Match("page", r"^[0-9]+$"))
    tags = Option(
        doc='''
        **Syntax:** **tags=***CSV string*
        **Description:**Comma(,)-separated string of tags to search for.
         Wildcard is %.''',
        require=False)
    to_ids = Option(
        doc='''
        **Syntax:** **to_ids=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**Boolean to search only attributes with the flag
         "to_ids" set to true.''',
        require=False, validate=validators.Boolean())
    type = Option(
        doc='''
        **Syntax:** **type=***CSV string*
        **Description:**Comma(,)-separated string of types to search for.
         Wildcard is %.''',
        require=False)
    warning_list = Option(
        doc='''
        **Syntax:** **warning_list=***<1|y|Y|t|true|True|0|n|N|f|false|False>*
        **Description:**Boolean to filter out well known values.''',
        require=False, validate=validators.Boolean())

    @staticmethod
    def _record(serial_number, time_stamp, host, attributes, attribute_names, encoder):

        raw = encoder.encode(attributes)
        # Formulate record
        fields = dict()
        for f in attribute_names:
            if f in attributes:
                fields[f] = attributes[f]

        if serial_number > 0:
            fields['_serial'] = serial_number
            fields['_time'] = time_stamp
            fields['_raw'] = raw
            fields['host'] = host
            return fields

        record = OrderedDict(chain(
            (('_serial', serial_number), ('_time', time_stamp),
             ('_raw', raw), ('host', host)),
            map(lambda name: (name, fields.get(name, '')), attribute_names)))

        return record

    def generate(self):

        # Phase 1: Preparation
        misp_instance = self.misp_instance
        storage = self.service.storage_passwords
        my_args = prepare_config(self, 'misp42splunk', misp_instance, storage)
        if my_args is None:
            raise Exception("Sorry, no configuration for misp_instance={}".format(misp_instance))
        my_args['host'] = my_args['misp_url'].replace('https://', '')
        # check that ONE of mandatory fields is present
        mandatory_arg = 0
        if self.json_request is not None:
            mandatory_arg = mandatory_arg + 1
        if self.eventid:
            mandatory_arg = mandatory_arg + 1
        if self.last:
            mandatory_arg = mandatory_arg + 1
        if self.date:
            mandatory_arg = mandatory_arg + 1

        if mandatory_arg == 0:
            raise Exception('Missing "json_request", "eventid", "last" or "date" argument')
        elif mandatory_arg > 1:
            raise Exception('Options "json_request", "eventid", "last" and "date" are mutually exclusive')

        body_dict = dict()
        # Only ONE combination was provided
        if self.json_request is not None:
            body_dict = json.loads(self.json_request)
            logging.info('Option "json_request" set')
        elif self.eventid:
            if "," in self.eventid:
                event_criteria = {}
                event_list = self.eventid.split(",")
                event_criteria['OR'] = event_list
                body_dict['eventid'] = event_criteria
            else:
                body_dict['eventid'] = self.eventid
            logging.info('Option "eventid" set with %s',
                         json.dumps(body_dict['eventid']))
        elif self.last:
            body_dict['last'] = self.last
            logging.info('Option "last" set with %s', str(body_dict['last']))
        else:
            body_dict['date'] = self.date.split()
            logging.info('Option "date" set with %s',
                         json.dumps(body_dict['date']))

        # Force some values on JSON request
        body_dict['returnFormat'] = 'json'
        body_dict['withAttachments'] = False
        body_dict['deleted'] = False
        body_dict['includeEventUuid'] = True
        # set proper headers
        headers = {'Content-type': 'application/json'}
        headers['Authorization'] = my_args['misp_key']
        headers['Accept'] = 'application/json'

        # Search pagination
        pagination = True
        if self.limit is not None:
            limit = int(self.limit)
        elif 'limit' in body_dict:
            limit = int(body_dict['limit'])
        else:
            limit = 1000
        if limit == 0:
            pagination = False
        if self.page is not None:
            page = int(self.page)
        elif 'page' in body_dict:
            page = body_dict['page']
        else:
            page = 1

        # Search parameters: boolean and filter
        # manage to_ids and enforceWarninglist
        # to avoid FP enforceWarninglist is set to True if
        # to_ids is set to True (search criterion)
        if self.category is not None:
            if "," in self.category:
                cat_criteria = {}
                cat_list = self.category.split(",")
                cat_criteria['OR'] = cat_list
                body_dict['category'] = cat_criteria
            else:
                body_dict['category'] = self.category
        if self.endpoint == 'events':
            my_args['misp_url'] = my_args['misp_url'] + '/events/restSearch'
        else:
            my_args['misp_url'] = my_args['misp_url'] + '/attributes/restSearch'
        if self.geteventtag is True:
            body_dict['includeEventTags'] = True
        if self.keep_related is True:
            keep_related = True
        else:
            keep_related = False
        if self.to_ids is True:
            body_dict['to_ids'] = True
            body_dict['enforceWarninglist'] = True  # protection
        elif self.to_ids is False:
            body_dict['to_ids'] = False
        if self.type is not None:
            if "," in self.type:
                type_criteria = {}
                type_list = self.type.split(",")
                type_criteria['OR'] = type_list
                body_dict['type'] = type_criteria
            else:
                body_dict['type'] = self.type
        if self.warning_list is True:
            body_dict['enforceWarninglist'] = True
        elif self.warning_list is False:
            body_dict['enforceWarninglist'] = False
        if self.tags is not None or self.not_tags is not None:
            tags_criteria = {}
            if self.tags is not None:
                tags_list = self.tags.split(",")
                tags_criteria['OR'] = tags_list
            if self.not_tags is not None:
                tags_list = self.not_tags.split(",")
                tags_criteria['NOT'] = tags_list
            body_dict['tags'] = tags_criteria

        if pagination is True:
            body_dict['page'] = page
            body_dict['limit'] = limit

        body = json.dumps(body_dict)
        logging.debug('mispgetioc request body: %s', body)
        # search
        r = requests.post(my_args['misp_url'], headers=headers, data=body,
                          verify=my_args['misp_verifycert'],
                          cert=my_args['client_cert_full_path'],
                          proxies=my_args['proxies'])
        # check if status is anything other than 200;
        # throw an exception if it is
        if r.status_code in (200, 201, 204):
            logging.info(
                "[CO301] INFO mispcollect successful. "
                "url={}, HTTP status={}".format(my_args['misp_url'], r.status_code)
            )
        else:
            logging.error(
                "[CO302] ERROR mispcollect failed. "
                "url={}, data={}, HTTP Error={}, content={}"
                .format(my_args['misp_url'], body, r.status_code, r.text)
            )
            raise Exception(
                "[CO302] ERROR mispcollect failed. "
                "url={}, data={}, HTTP Error={}, content={}"
                .format(my_args['misp_url'], body, r.status_code, r.text)
            )
        # response is 200 by this point or we would have thrown an exception
        response = r.json()
        encoder = json.JSONEncoder(ensure_ascii=False, separators=(',', ':'))
        if self.endpoint == "events":
            if 'response' in response:
                for r_item in response['response']:
                    if 'Event' in r_item:
                        attribute_names = []
                        serial_number = 0
                        for e in list(r_item.values()):
                            if keep_related is False:
                                e.pop('RelatedEvent', None)
                            if serial_number == 0:
                                for k in list(e.keys()):
                                    attribute_names.append(k)
                            yield MispCollectCommand._record(
                                serial_number, e['timestamp'], my_args['host'],
                                e, attribute_names, encoder)
                        serial_number += 1
                        GeneratingCommand.flush
        else:
            if 'response' in response:
                if 'Attribute' in response['response']:
                    attribute_names = []
                    serial_number = 0
                    for a in response['response']['Attribute']:
                        if serial_number == 0:
                            for k in list(a.keys()):
                                attribute_names.append(k)
                        yield MispCollectCommand._record(
                            serial_number, a['timestamp'], my_args['host'],
                            a, attribute_names, encoder)
                        serial_number += 1
                        GeneratingCommand.flush
Beispiel #27
0
class curlCommand(GeneratingCommand):
  # Authorization : Bearer cn389ncoiwuencr
  url        = Option(require=True)
  paramMap   = Option(require=False)
  output     = Option(require=False, default='json')
  timeout    = Option(require=False, default=10, validate=validators.Integer())
  auth       = Option(require=False)
  headers    = Option(require=False)
  proxies    = Option(require=False)
  unsetProxy = Option(require=False, validate=validators.Boolean())
  
  def generate(self):
    url        = self.url
    paramMap   = self.parseParamMap(self.paramMap) if self.paramMap != None else None
    output     = self.output
    timeout    = self.timeout if self.timeout != None else None
    auth       = self.parseAuth(self.auth) if self.auth != None else None
    headers    = self.parseHeaders(self.headers) if self.headers != None else None
    proxies    = self.parseProxies(self.proxies) if self.proxies != None else None
    unsetProxy = self.unsetProxy
 
    # Unset proxy, if unsetProxy = True
    if unsetProxy == True:
      if 'HTTP' in os.environ.keys():
        del os.environ['HTTP']
      if 'HTTPS' in os.environ.keys():
        del os.environ['HTTPS']

    # Load data from REST API
    record = {}    
    try:
      request = requests.get(
        url,
        params=paramMap,
        auth=auth,
        headers=headers,
        timeout=timeout,
        proxies=proxies
      )

      # Choose right output format
      if output == 'json':
        record = request.json()
      else:
        record = {'reponse': request.content}

    except requests.exceptions.RequestException as err:
      record = ({"Error:": err})
    
    yield record

  ''' HELPERS '''
  '''
    Parse paramMap into python dict
    @paramMap string: Pattern 'foo=bar&hello=world, ...'
    @return dict
  '''
  def parseParamMap(self, paramMap):
    paramStr = ''

    # Check, if params contain \, or \= and replace it with placeholder
    paramMap = paramMap.replace(r'\,', '&#44;')
    paramMap = paramMap.split(',')

    for param in paramMap:
      paramStr += param.replace('&#44;', ',').strip() + '&'

    # Delete last &
    return paramStr[:-1]

  '''
    Parse proxy into python dict
    @proxy string: Comma separated proxies -> http,https
    @return dict
  '''
  def parseProxies(self, proxies):
    proxies = proxies.split(',')

    return {
      'http': proxies[0].strip(),
      'https' : proxies[1].strip()
    }

  '''
    Parse auth into python dict with correct method
    @proxy string: Comma separated auth params -> method,user,pass
    @return object/bool
  '''
  def parseAuth(self, auth):
    # Password could use commas, so just split 2 times
    auth = auth.rsplit(',', 2)

    # Use correcht auth method
    if auth[0].lower() == 'basic':
      return (auth[1].strip(), auth[2].strip())
    elif auth[0].lower() == 'digest':
      return HTTPDigestAuth(auth[0].strip(), auth[1].strip())

    # Return false in case of no valid method
    return False
    
  '''
    Convert headers string into dict
    @headers string: Headers as json string
    @return dict
  '''
  def parseHeaders(self, headers):
    # Replace single quotes with double quotes for valid json
    return json.loads(
      headers.replace('\'', '"')
    )
Beispiel #28
0
class MkJSONCommand(StreamingCommand):
    """ 

    ##Syntax


    ##Description


    ##Example


    """
    includehidden = Option(require=False, validate=validators.Boolean())
    outputfield = Option(require=False, validate=validators.Fieldname())
    sortkeys = Option(require=False, validate=validators.Boolean())

    def stream(self, events):

        if not self.outputfield:

            outputfield = "_raw"

        else:

            outputfield = self.outputfield

        if not self.includehidden:

            self.includehidden = False

        if not self.sortkeys:

            self.sortkeys = False

        for event in events:

            includedfields = set()

            if len(self.fieldnames) > 0:

                for fieldname in self.fieldnames:

                    if fieldname in event:

                        includedfields.add(fieldname)

                outputdict = {}

                for field in includedfields:

                    if len(event[field]) > 0:

                        outputdict[field] = event[field]

                event[outputfield] = json.dumps(outputdict, sort_keys=self.sortkeys)

            else:

                outputdict = {}

                for field in event:

                    if self.includehidden or not re.match('^\_[^\_]',field):

                        if len(event[field]) > 0:

                            outputdict[field] = event[field]

                event[outputfield] = json.dumps(outputdict, sort_keys=self.sortkeys)

            yield event
Beispiel #29
0
class TestSearchCommand(SearchCommand):

    boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        validate=validators.Boolean())

    required_boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        require=True, validate=validators.Boolean())

    aliased_required_boolean = Option(
        doc='''
        **Syntax:** **boolean=***<value>*
        **Description:** A boolean value''',
        name='foo', require=True, validate=validators.Boolean())

    code = Option(
        doc='''
        **Syntax:** **code=***<value>*
        **Description:** A Python expression, if mode == "eval", or statement, if mode == "exec"''',
        validate=validators.Code())

    required_code = Option(
        doc='''
        **Syntax:** **code=***<value>*
        **Description:** A Python expression, if mode == "eval", or statement, if mode == "exec"''',
        require=True, validate=validators.Code())

    duration = Option(
        doc='''
        **Syntax:** **duration=***<value>*
        **Description:** A length of time''',
        validate=validators.Duration())

    required_duration = Option(
        doc='''
        **Syntax:** **duration=***<value>*
        **Description:** A length of time''',
        require=True, validate=validators.Duration())

    fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<value>*
        **Description:** Name of a field''',
        validate=validators.Fieldname())

    required_fieldname = Option(
        doc='''
        **Syntax:** **fieldname=***<value>*
        **Description:** Name of a field''',
        require=True, validate=validators.Fieldname())

    file = Option(
        doc='''
        **Syntax:** **file=***<value>*
        **Description:** Name of a file''',
        validate=validators.File())

    required_file = Option(
        doc='''
        **Syntax:** **file=***<value>*
        **Description:** Name of a file''',
        require=True, validate=validators.File())

    integer = Option(
        doc='''
        **Syntax:** **integer=***<value>*
        **Description:** An integer value''',
        validate=validators.Integer())

    required_integer = Option(
        doc='''
        **Syntax:** **integer=***<value>*
        **Description:** An integer value''',
        require=True, validate=validators.Integer())

    map = Option(
        doc='''
        **Syntax:** **map=***<value>*
        **Description:** A mapping from one value to another''',
        validate=validators.Map(foo=1, bar=2, test=3))

    required_map = Option(
        doc='''
        **Syntax:** **map=***<value>*
        **Description:** A mapping from one value to another''',
        require=True, validate=validators.Map(foo=1, bar=2, test=3))

    match = Option(
        doc='''
        **Syntax:** **match=***<value>*
        **Description:** A value that matches a regular expression pattern''',
        validate=validators.Match('social security number', r'\d{3}-\d{2}-\d{4}'))

    required_match = Option(
        doc='''
        **Syntax:** **required_match=***<value>*
        **Description:** A value that matches a regular expression pattern''',
        require=True, validate=validators.Match('social security number', r'\d{3}-\d{2}-\d{4}'))

    optionname = Option(
        doc='''
        **Syntax:** **optionname=***<value>*
        **Description:** The name of an option (used internally)''',
        validate=validators.OptionName())

    required_optionname = Option(
        doc='''
        **Syntax:** **optionname=***<value>*
        **Description:** The name of an option (used internally)''',
        require=True, validate=validators.OptionName())

    regularexpression = Option(
        doc='''
        **Syntax:** **regularexpression=***<value>*
        **Description:** Regular expression pattern to match''',
        validate=validators.RegularExpression())

    required_regularexpression = Option(
        doc='''
        **Syntax:** **regularexpression=***<value>*
        **Description:** Regular expression pattern to match''',
        require=True, validate=validators.RegularExpression())

    set = Option(
        doc='''
        **Syntax:** **set=***<value>*
        **Description:** A member of a set''',
        validate=validators.Set('foo', 'bar', 'test'))

    required_set = Option(
        doc='''
        **Syntax:** **set=***<value>*
        **Description:** A member of a set''',
        require=True, validate=validators.Set('foo', 'bar', 'test'))

    class ConfigurationSettings(SearchCommand.ConfigurationSettings):
        @classmethod
        def fix_up(cls, command_class):
            pass
Beispiel #30
0
class GeoDistanceCommand(ReportingCommand):
    """ Computes the distance of adjacent events

    ##Syntax

    .. code-block::
        geodistance latfield=<field> longfield=<field> output_field=<field> miles=<bool> group_by=field_to_group_by
                    haversine=<bool>

    ##Description

    This search command calculates the relative vincenty distances of adjacent events given their coordinates
    (latitudes and longitudes).
    It computes the distances in miles by default, but changed to Km by setting `miles=F`
    It can also compute the adjacent distances for a groups when the `group_by` is specified.

    ##Note:

    *Events that do not have latitudes or longitudes, as is the output when geocoding private non-routable IP addresses,
     will be given a distance of 0.0. The next relative distance will still be based on last public address found.

    *The first event in the result will also have a distance of 0.0

    ##Example

    This example computes the relative distance for adjacent VPN connection attempts made by each user

    CLI:
    ..code-block::
            "index=vpn | stats count by src_ip , user |
             iplocation src_ip | fields src_ip, user, lat, lon  |
             geodistance latfield=lat longfield=lon output_field=distance miles=F
             group_by=user haversine=False"

    """
    latfield = Option(doc='''
        **Syntax:** **latfield=** *<fieldname>*
        **Description:** Name of the field that holds the latitude''',
                      require=True,
                      validate=validators.Fieldname())
    longfield = Option(doc='''
        **Syntax:** **longfield=** *<fieldname>*
        **Description:** Name of the field that holds the longitude''',
                       require=True,
                       validate=validators.Fieldname())
    group_by = Option(doc='''
        **Syntax:** **group_by=** *<fieldname>*
        **Description:** Name of the field to be used to categorize events when computing distances''',
                      require=False,
                      validate=validators.Fieldname())
    miles = Option(doc='''
        **Syntax:** **miles=** *<bool>*
        **Description:** If set to true, this converts the distance to miles instead of km''',
                   require=False,
                   validate=validators.Boolean(),
                   default=False)
    output_field = Option(doc='''
        **Syntax:** **output_field=** *<fieldname>*
        **Description:** Name of the field that will hold the relative distance returned in the output''',
                          require=True,
                          validate=validators.Fieldname())
    use_haversine = Option(name='haversine',
                           doc='''
        **Syntax:** **haversine=** *<fieldname>*
        **Description:** If set to true, this calculates the harversine distance instead of the vincenty distance''',
                           require=False,
                           validate=validators.Boolean(),
                           default=False)

    def __init__(self):
        super(GeoDistanceCommand, self).__init__()
        environment.splunklib_logger = self._logger

    @Configuration()
    def map(self, events):
        for event in events:
            yield event

    def reduce(self, events):
        latitude = self.latfield
        longitude = self.longfield
        relative_distance = self.output_field
        use_haversine = bool(self.use_haversine)
        self.logger.info("[%s] - Starting geodistance instance" %
                         str(self.metadata.searchinfo.sid))
        self.logger.debug(
            "[%s] - Using parameters - %s" %
            (str(self.metadata.searchinfo.sid), str(self.metadata)))
        if self.group_by:
            position_tracker = {}
            for event in events:
                current = event
                if not (current[latitude] or current[longitude]):
                    current[relative_distance] = 0.0
                    self.logger.debug(
                        "[%s] - Using distance=0 for private IPs or unknown coordinates. "
                        "Exclude if undesired." %
                        str(self.metadata.searchinfo.sid))
                else:
                    current_pos = (float(current[latitude]),
                                   float(current[longitude]))
                    if current[self.group_by] not in position_tracker.keys():
                        last_pos = None
                    else:
                        last_pos = position_tracker[current[self.group_by]]
                    if last_pos is None:
                        current[relative_distance] = 0.0
                        self.logger.debug(
                            "[%s] - Initializing the first location with distance=0"
                            % str(self.metadata.searchinfo.sid))
                    else:
                        if use_haversine:
                            current[relative_distance] = haversine(
                                last_pos, current_pos, miles=bool(self.miles))
                        else:
                            current[relative_distance] = vincenty(
                                last_pos, current_pos, miles=bool(self.miles))
                    position_tracker[current[self.group_by]] = current_pos
                yield current
        else:
            last_pos = None
            for event in events:
                current = event
                if not (current[latitude] or current[longitude]):
                    current[relative_distance] = 0.0
                    self.logger.debug(
                        "[%s] - Using distance=0 for private IPs or unknown coordinates. Exclude if undesired."
                        % str(self.metadata.searchinfo.sid))
                else:
                    current_pos = (float(current[latitude]),
                                   float(current[longitude]))
                    if last_pos is None:
                        current[relative_distance] = 0.0
                        self.logger.debug(
                            "[%s] - Initializing the first location with distance=0"
                            % str(self.metadata.searchinfo.sid))
                    else:
                        if use_haversine:
                            current[relative_distance] = haversine(
                                last_pos, current_pos, miles=bool(self.miles))
                        else:
                            current[relative_distance] = vincenty(
                                last_pos, current_pos, miles=bool(self.miles))
                    last_pos = current_pos
                self.logger.debug(current)
                yield current
            self.logger.info("[%s] - Completed successfully." %
                             str(self.metadata.searchinfo.sid))