Ejemplo n.º 1
0
    def parse_line(self, line, report):
        event = Event(report)

        info = line.split("<td>")
        if len(line) <= 0 or len(info) < 3:
            return

        ip = info[1].split('</td>')[0].strip()
        last_seen = info[2].split('</td>')[0].strip() + '-05:00'
        description = self.parser.unescape(info[3].split('</td>')[0].strip())

        for key in ClassificationType.allowed_values:
            if description.lower().find(key.lower()) > -1:
                event.add("classification.type", key)
                break
        else:
            for key, value in TAXONOMY.items():
                if description.lower().find(key.lower()) > -1:
                    event.add("classification.type", value)
                    break

        if not event.contains("classification.type"):
            event.add("classification.type", 'unknown')

        event.add("time.source", last_seen)
        event.add("source.ip", ip)
        event.add("event_description.text", description)
        event.add("raw", line + "</tr>")
        yield event
Ejemplo n.º 2
0
    def parse_line(self, line, report):
        event = Event(report)

        info = line.split("<td>")
        if len(line) <= 0 or len(info) < 3:
            return

        ip = info[1].split('</td>')[0].strip()
        last_seen = info[2].split('</td>')[0].strip() + '-05:00'
        if sys.version_info < (3, 4):
            description = self.parser.unescape(info[3].split('</td>')[0].strip())
        else:
            description = html.unescape(info[3].split('</td>')[0].strip())

        for key in ClassificationType.allowed_values:
            if description.lower().find(key.lower()) > -1:
                event.add("classification.type", key)
                break
        else:
            for key, value in TAXONOMY.items():
                if description.lower().find(key.lower()) > -1:
                    event.add("classification.type", value)
                    break

        if not event.contains("classification.type"):
            event.add("classification.type", 'unknown')

        event.add("time.source", last_seen)
        event.add("source.ip", ip)
        event.add("event_description.text", description)
        event.add("raw", line + "</tr>")
        yield event
Ejemplo n.º 3
0
    def parse_line(self, row, report):
        event = Event(report)

        for key, value in zip(self.parameters.columns, row):

            if key in ["__IGNORE__", ""]:
                continue
            if key in ["time.source", "time.destination"]:
                value = parse(value, fuzzy=True).isoformat()
                value += " UTC"
            # regex from http://stackoverflow.com/a/23483979
            # matching ipv4/ipv6 IP within string
            elif key in ["source.ip", "destination.ip"]:
                value = re.compile(
                    '(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
                    '\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0'
                    '-5])|(([a-zA-Z]|[a-zA-Z][a-zA-Z0-9\-]*[a-zA-Z0-9])'
                    '\.)*([A-Za-z]|[A-Za-z][A-Za-z0-9\-]*[A-Za-z0-9])|'
                    '\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|('
                    '([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]'
                    '|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d'
                    '\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:['
                    '0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|['
                    '1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|'
                    ':))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1'
                    ',3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d'
                    '\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){'
                    '3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,'
                    '4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-'
                    '4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-'
                    '9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-'
                    'Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0'
                    '-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1'
                    '\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(('
                    '(:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4'
                    '}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2['
                    '0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]'
                    '{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2'
                    '[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|'
                    '[1-9]?\d)){3}))|:)))(%.+)?').match(value).group()
            elif key.endswith('.url') and '://' not in value:
                value = self.parameters.default_url_protocol + value
            elif key in ["classification.type"] and self.type_translation:
                if value in self.type_translation:
                    value = self.type_translation[value]
                elif not hasattr(self.parameters, 'type'):
                    continue
            event.add(key, value)

        if hasattr(self.parameters, 'type')\
                and not event.contains("classification.type"):
            event.add('classification.type', self.parameters.type)
        event.add("raw", ",".join(row))
        yield event
Ejemplo n.º 4
0
    def parse_line(self, row, report):
        event = Event(report)

        for key, value in zip(self.parameters.columns, row):

            if key in ["__IGNORE__", ""]:
                continue
            if key in ["time.source", "time.destination"]:
                value = parse(value, fuzzy=True).isoformat()
                value += " UTC"
            # regex from http://stackoverflow.com/a/23483979
            # matching ipv4/ipv6 IP within string
            elif key in ["source.ip", "destination.ip"]:
                value = re.compile(
                    '(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
                    '\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0'
                    '-5])|(([a-zA-Z]|[a-zA-Z][a-zA-Z0-9\-]*[a-zA-Z0-9])'
                    '\.)*([A-Za-z]|[A-Za-z][A-Za-z0-9\-]*[A-Za-z0-9])|'
                    '\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|('
                    '([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]'
                    '|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d'
                    '\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:['
                    '0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|['
                    '1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|'
                    ':))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1'
                    ',3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d'
                    '\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){'
                    '3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,'
                    '4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-'
                    '4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-'
                    '9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-'
                    'Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0'
                    '-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1'
                    '\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(('
                    '(:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4'
                    '}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2['
                    '0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]'
                    '{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2'
                    '[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|'
                    '[1-9]?\d)){3}))|:)))(%.+)?').match(value).group()
            elif key.endswith('.url') and '://' not in value:
                value = self.parameters.default_url_protocol + value
            elif key in ["classification.type"] and self.type_translation:
                if value in self.type_translation:
                    value = self.type_translation[value]
                elif not hasattr(self.parameters, 'type'):
                    continue
            event.add(key, value)

        if hasattr(self.parameters, 'type')\
                and not event.contains("classification.type"):
            event.add('classification.type', self.parameters.type)
        event.add("raw", ",".join(row))
        yield event
Ejemplo n.º 5
0
    def process(self):
        report = self.receive_message()

        if report is None or not report.contains("raw"):
            self.acknowledge_message()
            return

        raw_report = utils.base64_decode(report.value("raw"))
        raw_report_splitted = raw_report.split("</tr>")[2:]

        parser = HTMLParser()

        for row in raw_report_splitted:
            event = Event(report)

            row = row.strip()

            if len(row) <= 0:
                continue

            info = row.split("<td>")
            if len(info) < 3:
                continue

            ip = info[1].split('</td>')[0].strip()
            last_seen = info[2].split('</td>')[0].strip() + '-05:00'
            description = parser.unescape(info[3].split('</td>')[0].strip())

            for key in ClassificationType.allowed_values:
                if description.lower().find(key.lower()) > -1:
                    event.add("classification.type",
                              key, sanitize=True)
                    break
            else:
                for key, value in TAXONOMY.items():
                    if description.lower().find(key.lower()) > -1:
                        event.add("classification.type",
                                  value, sanitize=True)
                        break

            if not event.contains("classification.type"):
                event.add("classification.type", u'unknown')

            event.add("time.source", last_seen, sanitize=True)
            event.add("source.ip", ip, sanitize=True)
            event.add("event_description.text", description, sanitize=True)
            event.add("raw", row, sanitize=True)

            self.send_message(event)
        self.acknowledge_message()
Ejemplo n.º 6
0
    def process(self):
        report = self.receive_message()

        if report is None or not report.contains("raw"):
            self.acknowledge_message()
            return

        raw_report = utils.base64_decode(report.value("raw"))
        raw_report_splitted = raw_report.split("</tr>")[2:]

        parser = HTMLParser()

        for row in raw_report_splitted:
            event = Event(report)

            row = row.strip()

            if len(row) <= 0:
                continue

            info = row.split("<td>")
            if len(info) < 3:
                continue

            ip = info[1].split('</td>')[0].strip()
            last_seen = info[2].split('</td>')[0].strip() + '-05:00'
            description = parser.unescape(info[3].split('</td>')[0].strip())

            for key in ClassificationType.allowed_values:
                if description.lower().find(key.lower()) > -1:
                    event.add("classification.type", key, sanitize=True)
                    break
            else:
                for key, value in TAXONOMY.items():
                    if description.lower().find(key.lower()) > -1:
                        event.add("classification.type", value, sanitize=True)
                        break

            if not event.contains("classification.type"):
                event.add("classification.type", u'unknown')

            event.add("time.source", last_seen, sanitize=True)
            event.add("source.ip", ip, sanitize=True)
            event.add("event_description.text", description, sanitize=True)
            event.add("raw", row, sanitize=True)

            self.send_message(event)
        self.acknowledge_message()
Ejemplo n.º 7
0
    def process(self):
        report = self.receive_message()

        columns = self.parameters.columns
        type_translation = None
        if hasattr(self.parameters, 'type_translation'):
            type_translation = json.loads(self.parameters.type_translation)

        raw_report = utils.base64_decode(report.get("raw"))
        # ignore lines starting with #
        raw_report = re.sub(r'(?m)^#.*\n?', '', raw_report)
        # ignore null bytes
        raw_report = re.sub(r'(?m)\0', '', raw_report)
        for row in csv.reader(io.StringIO(raw_report),
                              delimiter=str(self.parameters.delimiter)):
            event = Event(report)

            for key, value in zip(columns, row):

                if key in ["__IGNORE__", ""]:
                    continue
                try:
                    if key in ["time.source", "time.destination"]:
                        value = parse(value, fuzzy=True).isoformat()
                        value += " UTC"
                    # regex from http://stackoverflow.com/a/23483979
                    # matching ipv4/ipv6 IP within string
                    elif key in ["source.ip", "destination.ip"]:
                        value = re.compile(
                            '(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
                            '\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0'
                            '-5])|(([a-zA-Z]|[a-zA-Z][a-zA-Z0-9\-]*[a-zA-Z0-9])'
                            '\.)*([A-Za-z]|[A-Za-z][A-Za-z0-9\-]*[A-Za-z0-9])|'
                            '\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|('
                            '([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]'
                            '|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d'
                            '\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:['
                            '0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|['
                            '1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|'
                            ':))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1'
                            ',3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d'
                            '\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){'
                            '3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,'
                            '4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-'
                            '4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-'
                            '9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-'
                            'Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0'
                            '-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1'
                            '\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(('
                            '(:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4'
                            '}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2['
                            '0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]'
                            '{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2'
                            '[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|'
                            '[1-9]?\d)){3}))|:)))(%.+)?').match(value).group()
                    elif key.endswith('.url') and '://' not in value:
                        value = self.parameters.default_url_protocol + value
                    elif key in ["classification.type"] and type_translation:
                        if value in type_translation:
                            value = type_translation[value]
                        elif not hasattr(self.parameters, 'type'):
                            continue

                except:
                    self.logger.warning('Encountered error while parsing line'
                                        ' in csv file, ignoring this row: ' +
                                        repr(row))
                    continue
                event.add(key, value)

            if hasattr(self.parameters, 'type')\
                    and not event.contains("classification.type"):
                event.add('classification.type', self.parameters.type)
            event.add("raw", ",".join(row))

            self.send_message(event)
        self.acknowledge_message()
Ejemplo n.º 8
0
    def process(self):
        report = self.receive_message()

        if not report or not report.contains("raw"):
            self.acknowledge_message()
            return

        columns = self.parameters.columns
        type_translation = None
        if hasattr(self.parameters, 'type_translation'):
            type_translation = json.loads(self.parameters.type_translation)

        raw_report = utils.base64_decode(report.get("raw"))
        # ignore lines starting with #
        raw_report = re.sub(r'(?m)^#.*\n?', '', raw_report)
        # ignore null bytes
        raw_report = re.sub(r'(?m)\0', '', raw_report)
        for row in utils.csv_reader(raw_report,
                                    delimiter=str(self.parameters.delimiter)):
            event = Event(report)

            for key, value in zip(columns, row):

                if key in ["__IGNORE__", ""]:
                    continue
                try:
                    if key in ["time.source", "time.destination"]:
                        value = parse(value, fuzzy=True).isoformat()
                        value += " UTC"
                    # regex from http://stackoverflow.com/a/23483979
                    # matching ipv4/ipv6 IP within string
                    elif key in ["source.ip", "destination.ip"]:
                        value = re.compile(
                            '(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
                            '\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0'
                            '-5])|(([a-zA-Z]|[a-zA-Z][a-zA-Z0-9\-]*[a-zA-Z0-9])'
                            '\.)*([A-Za-z]|[A-Za-z][A-Za-z0-9\-]*[A-Za-z0-9])|'
                            '\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|('
                            '([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]'
                            '|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d'
                            '\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:['
                            '0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|['
                            '1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|'
                            ':))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1'
                            ',3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d'
                            '\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){'
                            '3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,'
                            '4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-'
                            '4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-'
                            '9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-'
                            'Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0'
                            '-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1'
                            '\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(('
                            '(:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4'
                            '}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2['
                            '0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]'
                            '{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2'
                            '[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|'
                            '[1-9]?\d)){3}))|:)))(%.+)?').match(value).group()
                    elif key.endswith('.url') and '://' not in value:
                        value = self.parameters.default_url_protocol + value
                    elif key in ["classification.type"] and type_translation:
                        if value in type_translation:
                            value = type_translation[value]
                        elif not hasattr(self.parameters, 'type'):
                            continue

                except:
                    self.logger.warning('Encountered error while parsing line'
                                        ' in csv file, ignoring this row: ' +
                                        repr(row))
                    continue
                event.add(key, value)

            if hasattr(self.parameters, 'type')\
                    and not event.contains("classification.type"):
                event.add('classification.type', self.parameters.type)
            event.add("raw", ",".join(row))

            self.send_message(event)
        self.acknowledge_message()