Python unzip 예제들, intelmq.lib.utils.unzip Python 예제들

예제 #1

0

파일 보기

    def process(self):
        formatting = getattr(self.parameters, 'http_url_formatting', False)
        if formatting:
            try:
                http_url = self.parameters.http_url.format(
                    time=Time(formatting))
            except TypeError:
                self.logger.error(
                    "Wrongly formatted http_url_formatting parameter: %s. Should be boolean or a time-delta JSON.",
                    formatting)
                raise
            except KeyError:
                self.logger.error(
                    "Wrongly formatted http_url parameter: %s. Possible misspell with 'time' variable.",
                    self.parameters.http_url)
                raise
        else:
            http_url = self.parameters.http_url

        self.logger.info("Downloading report from %r.", http_url)

        resp = self.session.get(url=http_url)

        if resp.status_code // 100 != 2:
            raise ValueError('HTTP response status code was %i.' %
                             resp.status_code)

        self.logger.info("Report downloaded.")

        raw_reports = []
        if not self.extract_files:
            try:
                raw_reports = tuple(
                    unzip(resp.content,
                          True,
                          try_gzip=False,
                          try_tar=False,
                          logger=self.logger,
                          return_names=True))
            except ValueError:
                raw_reports.append((None, resp.text))
            else:
                self.logger.info(
                    'Extracting files: '
                    "'%s'.",
                    "', '".join([file_name for file_name, _ in raw_reports]))
        else:
            raw_reports = unzip(resp.content,
                                self.extract_files,
                                return_names=True,
                                logger=self.logger)

        for file_name, raw_report in raw_reports:
            report = self.new_report()
            report.add("raw", raw_report)
            report.add("feed.url", http_url)
            if file_name:
                report.add("extra.file_name", file_name)
            self.send_message(report)

예제 #2

0

파일 보기

 def test_unzip_zip(self):
     """ Test the unzip function with a zip file. """
     filename = os.path.join(os.path.dirname(__file__),
                             '../assets/two_files.zip')
     with open(filename, 'rb') as fh:
         result = utils.unzip(fh.read(), extract_files=True)
     self.assertEqual(tuple(result), (b'bar text\n', b'foo text\n'))

예제 #3

0

파일 보기

 def test_unzip_gz(self):
     """ Test the unzip function with a gz file. """
     filename = os.path.join(os.path.dirname(__file__),
                             '../assets/foobar.gz')
     with open(filename, 'rb') as fh:
         result = utils.unzip(fh.read(), extract_files=True)
     self.assertEqual(result, (b'bar text\n', ))

예제 #4

0

파일 보기

 def test_unzip_tar_gz_return_names(self):
     """ Test the unzip function with a tar gz file and return_names. """
     filename = os.path.join(os.path.dirname(__file__), '../assets/two_files.tar.gz')
     with open(filename, 'rb') as fh:
         result = utils.unzip(fh.read(), extract_files=True, return_names=True)
     self.assertEqual(tuple(result), (('bar', b'bar text\n'),
                                      ('foo', b'foo text\n')))

예제 #5

0

파일 보기

파일: collector_http.py 프로젝트: zPepe/intelmq

    def process(self):
        formatting = getattr(self.parameters, 'http_url_formatting', False)
        if formatting:
            try:
                http_url = self.parameters.http_url.format(
                    time=Time(formatting))
            except TypeError:
                self.logger.error(
                    "Wrongly formatted http_url_formatting parameter: %s. Should be boolean or a time-delta JSON.",
                    formatting)
                raise
            except KeyError:
                self.logger.error(
                    "Wrongly formatted http_url parameter: %s. Possible misspell with 'time' variable.",
                    self.parameters.http_url)
                raise
        else:
            http_url = self.parameters.http_url

        self.logger.info("Downloading report from %r.", http_url)

        resp = self.session.get(url=http_url)

        if resp.status_code // 100 != 2:
            raise ValueError('HTTP response status code was %i.' %
                             resp.status_code)

        self.logger.info("Report downloaded.")

        raw_reports = []
        try:
            zfp = zipfile.ZipFile(io.BytesIO(resp.content), "r")
        except zipfile.BadZipfile:
            raw_reports.append(resp.text)
        else:
            self.logger.info('Extracting files:'
                             "'%s'.", "', '".join(zfp.namelist()))
            for filename in zfp.namelist():
                raw_reports.append(zfp.read(filename))

        if self.extract_files:
            if isinstance(self.extract_files, str) and len(self.extract_files):
                self.extract_files = self.extract_files.split(",")
                self.logger.info('Extracting files from archive: '
                                 "'%s'.", "', '".join(self.extract_files))
            else:
                self.logger.info('Extracting all files from archive.')
            raw_reports = [
                file for file in unzip(resp.content, self.extract_files)
            ]

        for raw_report in raw_reports:
            report = self.new_report()
            report.add("raw", raw_report)
            report.add("feed.url", http_url)
            self.send_message(report)

예제 #6

0

파일 보기

파일: collector_mail_attach.py 프로젝트: motok/intelmq

    def process_message(self, uid, message):
        seen = False

        for attach in message.attachments:
            if not attach:
                continue

            try:
                attach_filename = attach['filename']
            except KeyError:
                # https://github.com/certtools/intelmq/issues/1538
                self.logger.debug(
                    'Skipping attachment because of missing filename.')
                continue
            if attach_filename.startswith(
                    '"'
            ):  # for imbox versions older than 0.9.5, see also above
                attach_filename = attach_filename[1:-1]

            if re.search(self.attach_regex, attach_filename):

                self.logger.debug("Found suitable attachment %s.",
                                  attach_filename)

                report = self.new_report()

                if self.extract_files:
                    raw_reports = unzip(attach['content'].read(),
                                        self.extract_files,
                                        return_names=True,
                                        logger=self.logger)
                else:
                    raw_reports = ((attach_filename,
                                    attach['content'].read()), )

                for file_name, raw_report in raw_reports:
                    report = self.new_report()
                    report.add("raw", raw_report)
                    if file_name:
                        report.add("extra.file_name", file_name)
                    report["extra.email_subject"] = message.subject
                    report["extra.email_from"] = ','.join(
                        x['email'] for x in message.sent_from)
                    report["extra.email_message_id"] = message.message_id
                    report["extra.email_date"] = message.date
                    self.send_message(report)

                # Only mark read if message relevant to this instance,
                # so other instances watching this mailbox will still
                # check it.
                seen = True
        self.logger.info("Email report read.")
        return seen

예제 #7

0

파일 보기

파일: collector_rt.py 프로젝트: telolet347/intelmq

    def process(self):
        RT = rt.Rt(self.parameters.uri, self.parameters.user,
                   self.parameters.password)
        if not RT.login():
            raise ValueError('Login failed.')

        if self.not_older_than_type:
            if self.not_older_than_type == 'relative':
                self.not_older_than = datetime.now(
                ) - self.not_older_than_relative
            kwargs = {'Created__gt': self.not_older_than.isoformat()}
            self.logger.debug('Searching for tickets newer than %r.',
                              kwargs['Created__gt'])
        else:
            kwargs = {}

        for parameter_name, rt_name in self.parameter_mapping.items():
            parameter_value = getattr(self.parameters, parameter_name, None)
            if parameter_value:
                kwargs[rt_name] = parameter_value

        query = RT.search(order='Created', **kwargs)
        self.logger.info('%s results on search query.', len(query))

        for ticket in query:
            ticket_id = int(ticket['id'].split('/')[1])
            self.logger.debug('Process ticket %s.', ticket_id)
            content = 'attachment'
            success = False
            if self.parameters.attachment_regex:
                for (att_id, att_name, _, _) in RT.get_attachments(ticket_id):
                    if re.search(self.parameters.attachment_regex, att_name):
                        self.logger.debug('Found attachment %s: %r.', att_id,
                                          att_name)
                        success = True
                        content = 'attachment'
                        self.extract_files = self.extract_attachment
                        break
            if not success and self.parameters.url_regex:
                ticket = RT.get_history(ticket_id)[0]
                created = ticket['Created']
                urlmatch = re.search(self.parameters.url_regex,
                                     ticket['Content'])
                if urlmatch:
                    content = 'url'
                    self.extract_files = self.extract_download

                    url = urlmatch.group(0)
                    self.logger.debug('Matching URL found %r.', url)
                    success = True
            if not success:
                self.logger.info('No matching attachment or URL found.')
                continue

            report = self.new_report()

            if content == 'attachment':
                attachment = RT.get_attachment_content(ticket_id, att_id)
                created = RT.get_attachment(ticket_id, att_id)['Created']

                raw = attachment
            else:
                resp = self.session.get(url=url)

                response_code_class = resp.status_code // 100
                if response_code_class != 2:
                    self.logger.error(
                        'HTTP response status code for %r was %s. Skipping ticket %d.',
                        url, resp.status_code, ticket_id)
                    if response_code_class == 4:
                        self.logger.debug('Server response: %r.', resp.text)
                        if self.parameters.set_status:
                            RT.edit_ticket(ticket_id,
                                           status=self.parameters.set_status)
                        if self.parameters.take_ticket:
                            try:
                                RT.take(ticket_id)
                            except rt.BadRequest:
                                self.logger.exception(
                                    "Could not take ticket %s.", ticket_id)
                    else:
                        self.logger.info('Skipping now.')
                        continue
                self.logger.info("Report #%d downloaded.", ticket_id)
                self.logger.debug("Downloaded content has %d bytes.",
                                  len(resp.content))
                if self.extract_download:
                    raw = resp.content
                else:
                    raw = resp.text
                report["extra.file_name"] = file_name_from_response(resp)

            report.add("rtir_id", ticket_id)
            report.add("time.observation", created + ' UTC', overwrite=True)
            """
            On RT 3.8 these fields are only available on the original ticket, not the
            first history element as in 4.4
            """
            if "Subject" not in ticket:
                ticket = RT.get_ticket(ticket_id)
            report.add("extra.email_subject", ticket["Subject"])
            report.add("extra.ticket_subject", ticket["Subject"])
            report.add("extra.email_from", ','.join(ticket["Requestors"]))
            report.add("extra.ticket_requestors",
                       ','.join(ticket["Requestors"]))
            report.add("extra.ticket_queue", ticket["Queue"])
            report.add("extra.ticket_status", ticket["Status"])
            report.add("extra.ticket_owner", ticket["Owner"])

            if self.extract_files:
                try:
                    unzipped = unzip(raw,
                                     self.extract_files,
                                     return_names=True,
                                     logger=self.logger)
                except ValueError:
                    self.logger.error(
                        'Could not uncompress the file. Skipping for now.')
                    continue
                for file_name, raw_report in unzipped:
                    """
                    File name priority is:
                        From the archive (zip, tar.gz)
                        From the HTTP Response
                        From the Attachment name
                        For gz attachments, only the last options works
                    """
                    report_new = report.copy()
                    report_new.add("raw", raw_report)
                    report_new.add("extra.file_name",
                                   file_name,
                                   overwrite=True)
                    if "extra.file_name" not in report_new and att_name.endswith(
                            '.gz'):
                        report_new["extra.file_name"] = att_name[:-3]
                    self.send_message(report_new)
            else:
                report.add("raw", raw)
                self.send_message(report)

            if self.parameters.take_ticket:
                try:
                    RT.take(ticket_id)
                except rt.BadRequest:
                    self.logger.exception("Could not take ticket %s.",
                                          ticket_id)
            if self.parameters.set_status:
                RT.edit_ticket(ticket_id, status=self.parameters.set_status)

예제 #8

0

파일 보기

파일: collector_http.py 프로젝트: motok/intelmq

    def process(self):
        formatting = self.http_url_formatting
        if formatting:
            http_url = self.format_url(self.http_url, formatting)
        else:
            http_url = self.http_url

        self.logger.info("Downloading report from %r.", http_url)

        resp = self.http_get(http_url)

        if resp.status_code // 100 != 2:
            self.logger.debug('Request headers: %r.', resp.request.headers)
            self.logger.debug('Request body: %r.', resp.request.body)
            self.logger.debug('Response headers: %r.', resp.headers)
            self.logger.debug('Response body: %r.', resp.text)
            raise ValueError('HTTP response status code was %i.' % resp.status_code)

        self.logger.info("Report downloaded.")

        # PGP verification
        if self.use_gpg:
            result = self.verify_signature(data=resp.content)

            if not result:
                # Errors have been logged by the verify_signature function.
                return

            if not result.valid:
                self.logger.error("Signature for key {0.key_id} is not valid: {0.status}. Report rejected.".format(result))
                return

            if result.trust_level < 1:
                self.logger.debug("Trust level not defined for key {}.".format(result.key_id))
            elif result.trust_level < 3:
                self.logger.debug("Low trust level for key {0.key_id}: {0.trust_level}.".format(result))

            self.logger.info("PGP signature checked with key {0.key_id}: {0.status}.".format(result))

        # process reports
        raw_reports = []
        if not self.extract_files:
            try:
                raw_reports = tuple(unzip(resp.content, True, try_gzip=False,
                                          try_tar=False, logger=self.logger,
                                          return_names=True))
            except ValueError:
                raw_reports.append((None, resp.text))
            else:
                self.logger.info('Extracting files: '
                                 "'%s'.", "', '".join([file_name
                                                       for file_name, _
                                                       in raw_reports]))
        else:
            raw_reports = unzip(resp.content, self.extract_files,
                                return_names=True, logger=self.logger)

        for file_name, raw_report in raw_reports:
            report = self.new_report()
            report.add("raw", raw_report)
            report.add("feed.url", http_url)
            if file_name:
                report.add("extra.file_name", file_name)
            self.send_message(report)

예제 #9

0

파일 보기

    def process(self):

        formatting = getattr(self.parameters, 'http_url_formatting', False)
        if formatting:
            try:
                http_url = self.parameters.http_url.format(time=Time(formatting))
            except TypeError:
                self.logger.error("Wrongly formatted http_url_formatting parameter: %s. Should be boolean or a time-delta JSON.",
                                  formatting)
                raise
            except KeyError:
                self.logger.error("Wrongly formatted http_url parameter: %s. Possible misspell with 'time' variable.",
                                  self.parameters.http_url)
                raise
        else:
            http_url = self.parameters.http_url

        self.logger.info("Downloading report from %r.", http_url)

        timeoutretries = 0
        resp = None

        while timeoutretries < self.http_timeout_max_tries and resp is None:
            try:
                resp = requests.get(url=http_url, auth=self.auth,
                                    proxies=self.proxy, headers=self.http_header,
                                    verify=self.http_verify_cert,
                                    cert=self.ssl_client_cert,
                                    timeout=self.http_timeout_sec)

            except requests.exceptions.Timeout:
                timeoutretries += 1
                self.logger.warn("Timeout whilst downloading the report.")

        if resp is None and timeoutretries >= self.http_timeout_max_tries:
            self.logger.error("Request timed out %i times in a row.",
                              timeoutretries)
            return

        if resp.status_code // 100 != 2:
            raise ValueError('HTTP response status code was %i.' % resp.status_code)

        self.logger.info("Report downloaded.")

        raw_reports = []
        try:
            zfp = zipfile.ZipFile(io.BytesIO(resp.content), "r")
        except zipfile.BadZipfile:
            raw_reports.append(resp.text)
        else:
            self.logger.info('Extracting files:'
                             "'%s'.", "', '".join(zfp.namelist()))
            for filename in zfp.namelist():
                raw_reports.append(zfp.read(filename))

        if self.extract_files:
            if isinstance(self.extract_files, str) and len(self.extract_files):
                self.extract_files = self.extract_files.split(",")
                self.logger.info('Extracting files from archive: '
                                 "'%s'.", "', '".join(self.extract_files))
            else:
                self.logger.info('Extracting all files from archive.')
            raw_reports = [file for file in unzip(resp.content, self.extract_files)]

        for raw_report in raw_reports:
            report = self.new_report()
            report.add("raw", raw_report)
            report.add("feed.url", http_url)
            self.send_message(report)