def test_generate_reports_with_chunking_and_copying_header(self):
        """Test generate_reports with chunking and copying the header"""
        chunk_size = 1000

        # This test only makes sense if the test data actually is longer
        # than the chunk size
        self.assertTrue(chunk_size < len(csv_test_data))

        template = Report(harmonization=HARM)
        template.add("feed.name",
                     "test_generate_reports_with_chunking_and_header")
        observation_time = template["time.observation"]

        original_header = io.BytesIO(csv_test_data).readline()

        decoded_chunks = [original_header]
        for report in generate_reports(template, io.BytesIO(csv_test_data),
                                       chunk_size=chunk_size,
                                       copy_header_line=True):
            self.assertEqual(report["feed.name"],
                             "test_generate_reports_with_chunking_and_header")
            self.assertEqual(report["time.observation"], observation_time)
            report_data = io.BytesIO(base64.b64decode(report["raw"]))
            header = report_data.readline()
            chunk = report_data.read()

            self.assertEqual(original_header, header)
            decoded_chunks.append(chunk)

        self.assertEqual(b"".join(decoded_chunks), csv_test_data)
    def test_generate_reports_with_chunking_and_copying_header(self):
        """Test generate_reports with chunking and copying the header"""
        chunk_size = 1000

        # This test only makes sense if the test data actually is longer
        # than the chunk size
        self.assertTrue(chunk_size < len(csv_test_data))

        template = Report(harmonization=HARM)
        template.add("feed.name",
                     "test_generate_reports_with_chunking_and_header")
        observation_time = template["time.observation"]

        original_header = io.BytesIO(csv_test_data).readline()

        decoded_chunks = [original_header]
        for report in generate_reports(template,
                                       io.BytesIO(csv_test_data),
                                       chunk_size=chunk_size,
                                       copy_header_line=True):
            self.assertEqual(report["feed.name"],
                             "test_generate_reports_with_chunking_and_header")
            self.assertEqual(report["time.observation"], observation_time)
            report_data = io.BytesIO(base64.b64decode(report["raw"]))
            header = report_data.readline()
            chunk = report_data.read()

            self.assertEqual(original_header, header)
            decoded_chunks.append(chunk)

        self.assertEqual(b"".join(decoded_chunks), csv_test_data)
Exemple #3
0
    def process(self):
        self.logger.debug("Started looking for files.")

        if os.path.isdir(self.parameters.path):
            p = os.path.abspath(self.parameters.path)

            # iterate over all files in dir
            for f in os.listdir(p):
                filename = os.path.join(p, f)
                if os.path.isfile(filename):
                    if fnmatch.fnmatch(f, '*' + self.parameters.postfix):
                        self.logger.info("Processing file %r.", filename)

                        template = self.new_report()
                        template.add("feed.url", "file://localhost%s" % filename)

                        with open(filename, 'rb') as fh:
                            for report in generate_reports(template, fh, self.chunk_size,
                                                           self.chunk_replicate_header):
                                self.send_message(report)

                        if self.parameters.delete_file:
                            try:
                                os.remove(filename)
                                self.logger.debug("Deleted file: %r.", filename)
                            except PermissionError:
                                self.logger.error("Could not delete file %r.", filename)
                                self.logger.info("Maybe I don't have sufficient rights on that file?")
                                self.logger.error("Stopping now, to prevent reading this file again.")
                                self.stop()
Exemple #4
0
    def process(self):
        self.logger.debug("Started looking for files.")

        if os.path.isdir(self.parameters.path):
            p = os.path.abspath(self.parameters.path)

            # iterate over all files in dir
            for f in os.listdir(p):
                filename = os.path.join(p, f)
                if os.path.isfile(filename):
                    if fnmatch.fnmatch(f, '*' + self.parameters.postfix):
                        self.logger.info("Processing file %r.", filename)

                        template = self.new_report()
                        template.add("feed.url", "file://localhost%s" % filename)
                        template.add("extra.file_name", f)

                        with open(filename, 'rb') as fh:
                            for report in generate_reports(template, fh, self.chunk_size,
                                                           self.chunk_replicate_header):
                                self.send_message(report)

                        if self.parameters.delete_file:
                            try:
                                os.remove(filename)
                                self.logger.debug("Deleted file: %r.", filename)
                            except PermissionError:
                                self.logger.error("Could not delete file %r.", filename)
                                self.logger.info("Maybe I don't have sufficient rights on that file?")
                                self.logger.error("Stopping now, to prevent reading this file again.")
                                self.stop()
Exemple #5
0
    def process_message(self, uid, message):
        erroneous = False  # If errors occurred this will be set to true.
        seen = False

        for body in message.body['plain']:
            match = re.search(self.parameters.url_regex, str(body.decode('utf-8') if isinstance(body, bytes) else body))
            if match:
                url = match.group()
                # strip leading and trailing spaces, newlines and
                # carriage returns
                url = url.strip()

                self.logger.info("Downloading report from %r.", url)
                try:
                    resp = self.session.get(url=url)
                except requests.exceptions.Timeout:
                    self.logger.error("Request timed out %i times in a row. " %
                                      self.http_timeout_max_tries)
                    erroneous = True
                    # The download timed out too often, leave the Loop.
                    continue

                if resp.status_code // 100 != 2:
                    self.logger.error('HTTP response status code was {}.'
                                      ''.format(resp.status_code))
                    erroneous = True
                    continue

                if not resp.content:
                    self.logger.warning('Got empty reponse from server.')
                else:
                    self.logger.info("Report downloaded.")

                    template = self.new_report()
                    template["feed.url"] = url
                    template["extra.email_subject"] = message.subject
                    template["extra.email_from"] = ','.join(x['email'] for x in message.sent_from)
                    template["extra.email_message_id"] = message.message_id
                    template["extra.file_name"] = file_name_from_response(resp)

                    for report in generate_reports(template, io.BytesIO(resp.content),
                                                   self.chunk_size,
                                                   self.chunk_replicate_header):
                        self.send_message(report)

                seen = True

        if not erroneous:
            self.logger.info("Email report read.")
        else:
            if self.parameters.error_procedure == 'pass':
                seen = True
            else:
                self.logger.error("Email report read with above errors, the report was not processed.")

        return seen
Exemple #6
0
 def test_generate_reports_no_chunking(self):
     """Test generate_reports with chunking disabled"""
     template = Report(harmonization=HARM)
     template.add("feed.name", "test_generate_reports_no_chunking")
     [report] = list(generate_reports(template, io.BytesIO(csv_test_data),
                                      chunk_size=None,
                                      copy_header_line=False))
     self.assertEqual(report["feed.name"],
                      "test_generate_reports_no_chunking")
     self.assertEqual(base64.b64decode(report["raw"]), csv_test_data)
 def test_generate_reports_no_chunking(self):
     """Test generate_reports with chunking disabled"""
     template = Report(harmonization=HARM)
     template.add("feed.name", "test_generate_reports_no_chunking")
     [report] = list(generate_reports(template, io.BytesIO(csv_test_data),
                                      chunk_size=None,
                                      copy_header_line=False))
     self.assertEqual(report["feed.name"],
                      "test_generate_reports_no_chunking")
     self.assertEqual(base64.b64decode(report["raw"]), csv_test_data)
Exemple #8
0
    def process(self):
        mailbox = imbox.Imbox(self.parameters.mail_host,
                              self.parameters.mail_user,
                              self.parameters.mail_password,
                              self.parameters.mail_ssl)
        emails = mailbox.messages(folder=self.parameters.folder, unread=True)

        if emails:
            for uid, message in emails:

                if (self.parameters.subject_regex and
                        not re.search(self.parameters.subject_regex,
                                      re.sub("\r\n\s", " ", message.subject))):
                    continue

                for body in message.body['plain']:
                    match = re.search(self.parameters.url_regex, str(body))
                    if match:
                        url = match.group()
                        # strip leading and trailing spaces, newlines and
                        # carriage returns
                        url = url.strip()

                        self.logger.info("Downloading report from %r.", url)
                        resp = requests.get(url=url,
                                            auth=self.auth,
                                            proxies=self.proxy,
                                            headers=self.http_header,
                                            verify=self.http_verify_cert,
                                            cert=self.ssl_client_cert,
                                            timeout=self.http_timeout)

                        if resp.status_code // 100 != 2:
                            raise ValueError(
                                'HTTP response status code was {}.'
                                ''.format(resp.status_code))

                        self.logger.info("Report downloaded.")

                        template = self.new_report()

                        for report in generate_reports(
                                template, io.BytesIO(resp.content),
                                self.chunk_size, self.chunk_replicate_header):
                            self.send_message(report)

                        # Only mark read if message relevant to this instance,
                        # so other instances watching this mailbox will still
                        # check it.
                        mailbox.mark_seen(uid)
                self.logger.info("Email report read.")
        mailbox.logout()
Exemple #9
0
    def test_generate_reports_with_chunking_no_header(self):
        """Test generate_reports with chunking and not copying the header"""
        template = Report(harmonization=HARM)
        template.add("feed.name", "test_generate_reports_with_chunking")

        chunk_size = 1000

        # This test only makes sense if the test data actually is longer
        # than the chunk size
        self.assertTrue(chunk_size < len(csv_test_data))

        decoded_chunks = []
        for report in generate_reports(template, io.BytesIO(csv_test_data),
                                       chunk_size=chunk_size,
                                       copy_header_line=False):
            self.assertEqual(report["feed.name"],
                             "test_generate_reports_with_chunking")
            decoded_chunks.append(base64.b64decode(report["raw"]))

        self.assertEqual(b"".join(decoded_chunks), csv_test_data)
    def test_generate_reports_with_chunking_no_header(self):
        """Test generate_reports with chunking and not copying the header"""
        template = Report(harmonization=HARM)
        template.add("feed.name", "test_generate_reports_with_chunking")

        chunk_size = 1000

        # This test only makes sense if the test data actually is longer
        # than the chunk size
        self.assertTrue(chunk_size < len(csv_test_data))

        decoded_chunks = []
        for report in generate_reports(template, io.BytesIO(csv_test_data),
                                       chunk_size=chunk_size,
                                       copy_header_line=False):
            self.assertEqual(report["feed.name"],
                             "test_generate_reports_with_chunking")
            decoded_chunks.append(base64.b64decode(report["raw"]))

        self.assertEqual(b"".join(decoded_chunks), csv_test_data)
Exemple #11
0
    def process(self):
        mailbox = self.connect_mailbox()
        emails = mailbox.messages(folder=self.parameters.folder, unread=True)

        if emails:
            for uid, message in emails:

                if (self.parameters.subject_regex and
                        not re.search(self.parameters.subject_regex,
                                      re.sub(r"\r\n\s", " ", message.subject))):
                    continue

                erroneous = False  # If errors occured this will be set to true.

                for body in message.body['plain']:
                    match = re.search(self.parameters.url_regex, str(body))
                    if match:
                        url = match.group()
                        # strip leading and trailing spaces, newlines and
                        # carriage returns
                        url = url.strip()

                        self.logger.info("Downloading report from %r.", url)
                        timeoutretries = 0
                        resp = None
                        while timeoutretries < self.http_timeout_max_tries and resp is None:
                            try:
                                resp = requests.get(url=url,
                                                    auth=self.auth, proxies=self.proxy,
                                                    headers=self.http_header,
                                                    verify=self.http_verify_cert,
                                                    cert=self.ssl_client_cert,
                                                    timeout=self.http_timeout_sec)

                            except requests.exceptions.Timeout:
                                timeoutretries += 1
                                self.logger.warn("Timeout whilst downloading the report.")

                        if resp is None and timeoutretries >= self.http_timeout_max_tries:
                            self.logger.error("Request timed out %i times in a row. " %
                                              timeoutretries)
                            erroneous = True
                            # The download timed out too often, leave the Loop.
                            continue

                        if resp.status_code // 100 != 2:
                            raise ValueError('HTTP response status code was {}.'
                                             ''.format(resp.status_code))

                        self.logger.info("Report downloaded.")

                        template = self.new_report()

                        for report in generate_reports(template, io.BytesIO(resp.content),
                                                       self.chunk_size,
                                                       self.chunk_replicate_header):
                            self.send_message(report)

                        # Only mark read if message relevant to this instance,
                        # so other instances watching this mailbox will still
                        # check it.
                        try:
                            mailbox.mark_seen(uid)
                        except imaplib.abort:
                            # Disconnect, see https://github.com/certtools/intelmq/issues/852
                            mailbox = self.connect_mailbox()
                            mailbox.mark_seen(uid)

                if not erroneous:
                    self.logger.info("Email report read.")
                else:
                    self.logger.error("Email report read with errors, the report was not processed.")

        mailbox.logout()
Exemple #12
0
    def process(self):
        mailbox = imbox.Imbox(self.parameters.mail_host,
                              self.parameters.mail_user,
                              self.parameters.mail_password,
                              self.parameters.mail_ssl)
        emails = mailbox.messages(folder=self.parameters.folder, unread=True)

        if emails:
            for uid, message in emails:

                if (self.parameters.subject_regex and
                        not re.search(self.parameters.subject_regex,
                                      re.sub("\r\n\s", " ", message.subject))):
                    continue

                erroneous = False  # If errors occured this will be set to true.

                for body in message.body['plain']:
                    match = re.search(self.parameters.url_regex, str(body))
                    if match:
                        url = match.group()
                        # strip leading and trailing spaces, newlines and
                        # carriage returns
                        url = url.strip()

                        self.logger.info("Downloading report from %r.", url)
                        timeoutretries = 0
                        resp = None
                        while timeoutretries < self.http_timeout_max_tries and resp is None:
                            try:
                                resp = requests.get(url=url,
                                                    auth=self.auth, proxies=self.proxy,
                                                    headers=self.http_header,
                                                    verify=self.http_verify_cert,
                                                    cert=self.ssl_client_cert,
                                                    timeout=self.http_timeout_sec)

                            except requests.exceptions.Timeout:
                                timeoutretries += 1
                                self.logger.warn("Timeout whilst downloading the report.")

                        if resp is None and timeoutretries >= self.http_timeout_max_tries:
                            self.logger.error("Request timed out %i times in a row. " %
                                              timeoutretries)
                            erroneous = True
                            # The download timed out too often, leave the Loop.
                            continue

                        if resp.status_code // 100 != 2:
                            raise ValueError('HTTP response status code was {}.'
                                             ''.format(resp.status_code))

                        self.logger.info("Report downloaded.")

                        template = self.new_report()

                        for report in generate_reports(template, io.BytesIO(resp.content),
                                                       self.chunk_size,
                                                       self.chunk_replicate_header):
                            self.send_message(report)

                        # Only mark read if message relevant to this instance,
                        # so other instances watching this mailbox will still
                        # check it.
                        mailbox.mark_seen(uid)

                if not erroneous:
                    self.logger.info("Email report read.")
                else:
                    self.logger.error("Email report read with errors, the report was not processed.")

        mailbox.logout()
Exemple #13
0
    def process(self):
        mailbox = self.connect_mailbox()
        emails = mailbox.messages(folder=self.parameters.folder, unread=True,
                                  sent_to=getattr(self.parameters, "sent_to", None),
                                  sent_from=getattr(self.parameters, "sent_from", None))

        if emails:
            for uid, message in emails:

                if (self.parameters.subject_regex and
                        not re.search(self.parameters.subject_regex,
                                      re.sub(r"\r\n\s", " ", message.subject))):
                    self.logger.debug("Message with date %s skipped because subject %r does not match.",
                                      message.date, message.subject)
                    continue

                erroneous = False  # If errors occurred this will be set to true.

                for body in message.body['plain']:
                    match = re.search(self.parameters.url_regex, str(body))
                    if match:
                        url = match.group()
                        # strip leading and trailing spaces, newlines and
                        # carriage returns
                        url = url.strip()

                        self.logger.info("Downloading report from %r.", url)
                        timeoutretries = 0
                        resp = None
                        while timeoutretries < self.http_timeout_max_tries and resp is None:
                            try:
                                resp = requests.get(url=url,
                                                    auth=self.auth, proxies=self.proxy,
                                                    headers=self.http_header,
                                                    verify=self.http_verify_cert,
                                                    cert=self.ssl_client_cert,
                                                    timeout=self.http_timeout_sec)

                            except requests.exceptions.Timeout:
                                timeoutretries += 1
                                self.logger.warn("Timeout whilst downloading the report.")

                        if resp is None and timeoutretries >= self.http_timeout_max_tries:
                            self.logger.error("Request timed out %i times in a row. " %
                                              timeoutretries)
                            erroneous = True
                            # The download timed out too often, leave the Loop.
                            continue

                        if resp.status_code // 100 != 2:
                            raise ValueError('HTTP response status code was {}.'
                                             ''.format(resp.status_code))
                        if not resp.content:
                            self.logger.warning('Got empty reponse from server.')
                        else:
                            self.logger.info("Report downloaded.")

                            template = self.new_report()

                            for report in generate_reports(template, io.BytesIO(resp.content),
                                                           self.chunk_size,
                                                           self.chunk_replicate_header):
                                self.send_message(report)

                        # Only mark read if message relevant to this instance,
                        # so other instances watching this mailbox will still
                        # check it.
                        try:
                            mailbox.mark_seen(uid)
                        except imaplib.abort:
                            # Disconnect, see https://github.com/certtools/intelmq/issues/852
                            mailbox = self.connect_mailbox()
                            mailbox.mark_seen(uid)

                if not erroneous:
                    self.logger.info("Email report read.")
                else:
                    self.logger.error("Email report read with errors, the report was not processed.")
        else:
            self.logger.debug("No unread mails to check.")
        mailbox.logout()
Exemple #14
0
    def process_message(self, uid, message):
        erroneous = False  # If errors occurred this will be set to true.
        seen = False

        for body in message.body['plain']:
            match = re.search(
                self.parameters.url_regex,
                str(body.decode('utf-8') if isinstance(body, bytes) else body))
            if match:
                url = match.group()
                # strip leading and trailing spaces, newlines and
                # carriage returns
                url = url.strip()

                self.logger.info("Downloading report from %r.", url)
                timeoutretries = 0
                resp = None
                while timeoutretries < self.http_timeout_max_tries and resp is None:
                    try:
                        resp = requests.get(url=url,
                                            auth=self.auth,
                                            proxies=self.proxy,
                                            headers=self.http_header,
                                            verify=self.http_verify_cert,
                                            cert=self.ssl_client_cert,
                                            timeout=self.http_timeout_sec)

                    except requests.exceptions.Timeout:
                        timeoutretries += 1
                        self.logger.warn(
                            "Timeout whilst downloading the report.")

                if resp is None and timeoutretries >= self.http_timeout_max_tries:
                    self.logger.error("Request timed out %i times in a row. " %
                                      timeoutretries)
                    erroneous = True
                    # The download timed out too often, leave the Loop.
                    continue

                if resp.status_code // 100 != 2:
                    self.logger.error('HTTP response status code was {}.'
                                      ''.format(resp.status_code))
                    erroneous = True
                    continue

                if not resp.content:
                    self.logger.warning('Got empty reponse from server.')
                else:
                    self.logger.info("Report downloaded.")

                    template = self.new_report()

                    for report in generate_reports(
                            template, io.BytesIO(resp.content),
                            self.chunk_size, self.chunk_replicate_header):
                        self.send_message(report)

                seen = True

        if not erroneous:
            self.logger.info("Email report read.")
        else:
            if self.parameters.error_procedure == 'pass':
                seen = True
            else:
                self.logger.error(
                    "Email report read with above errors, the report was not processed."
                )

        return seen
    def process_message(self, uid, message):
        erroneous = False  # If errors occurred this will be set to true.
        seen = False

        for body in message.body['plain']:
            match = re.search(self.parameters.url_regex, str(body.decode('utf-8') if isinstance(body, bytes) else body))
            if match:
                url = match.group()
                # strip leading and trailing spaces, newlines and
                # carriage returns
                url = url.strip()

                self.logger.info("Downloading report from %r.", url)
                timeoutretries = 0
                resp = None
                while timeoutretries < self.http_timeout_max_tries and resp is None:
                    try:
                        resp = requests.get(url=url,
                                            auth=self.auth, proxies=self.proxy,
                                            headers=self.http_header,
                                            verify=self.http_verify_cert,
                                            cert=self.ssl_client_cert,
                                            timeout=self.http_timeout_sec)

                    except requests.exceptions.Timeout:
                        timeoutretries += 1
                        self.logger.warn("Timeout whilst downloading the report.")

                if resp is None and timeoutretries >= self.http_timeout_max_tries:
                    self.logger.error("Request timed out %i times in a row. " %
                                      timeoutretries)
                    erroneous = True
                    # The download timed out too often, leave the Loop.
                    continue

                if resp.status_code // 100 != 2:
                    self.logger.error('HTTP response status code was {}.'
                                      ''.format(resp.status_code))
                    erroneous = True
                    continue

                if not resp.content:
                    self.logger.warning('Got empty reponse from server.')
                else:
                    self.logger.info("Report downloaded.")

                    template = self.new_report()

                    for report in generate_reports(template, io.BytesIO(resp.content),
                                                   self.chunk_size,
                                                   self.chunk_replicate_header):
                        self.send_message(report)

                seen = True

        if not erroneous:
            self.logger.info("Email report read.")
        else:
            if self.parameters.error_procedure == 'pass':
                seen = True
            else:
                self.logger.error("Email report read with above errors, the report was not processed.")

        return seen