def process(self): formatting = getattr(self.parameters, 'http_url_formatting', False) if formatting: try: http_url = self.parameters.http_url.format( time=Time(formatting)) except TypeError: self.logger.error( "Wrongly formatted http_url_formatting parameter: %s. Should be boolean or a time-delta JSON.", formatting) raise except KeyError: self.logger.error( "Wrongly formatted http_url parameter: %s. Possible misspell with 'time' variable.", self.parameters.http_url) raise else: http_url = self.parameters.http_url self.logger.info("Downloading report from %r.", http_url) resp = self.session.get(url=http_url) if resp.status_code // 100 != 2: raise ValueError('HTTP response status code was %i.' % resp.status_code) self.logger.info("Report downloaded.") raw_reports = [] if not self.extract_files: try: raw_reports = tuple( unzip(resp.content, True, try_gzip=False, try_tar=False, logger=self.logger, return_names=True)) except ValueError: raw_reports.append((None, resp.text)) else: self.logger.info( 'Extracting files: ' "'%s'.", "', '".join([file_name for file_name, _ in raw_reports])) else: raw_reports = unzip(resp.content, self.extract_files, return_names=True, logger=self.logger) for file_name, raw_report in raw_reports: report = self.new_report() report.add("raw", raw_report) report.add("feed.url", http_url) if file_name: report.add("extra.file_name", file_name) self.send_message(report)
def test_unzip_zip(self): """ Test the unzip function with a zip file. """ filename = os.path.join(os.path.dirname(__file__), '../assets/two_files.zip') with open(filename, 'rb') as fh: result = utils.unzip(fh.read(), extract_files=True) self.assertEqual(tuple(result), (b'bar text\n', b'foo text\n'))
def test_unzip_gz(self): """ Test the unzip function with a gz file. """ filename = os.path.join(os.path.dirname(__file__), '../assets/foobar.gz') with open(filename, 'rb') as fh: result = utils.unzip(fh.read(), extract_files=True) self.assertEqual(result, (b'bar text\n', ))
def test_unzip_tar_gz_return_names(self): """ Test the unzip function with a tar gz file and return_names. """ filename = os.path.join(os.path.dirname(__file__), '../assets/two_files.tar.gz') with open(filename, 'rb') as fh: result = utils.unzip(fh.read(), extract_files=True, return_names=True) self.assertEqual(tuple(result), (('bar', b'bar text\n'), ('foo', b'foo text\n')))
def process(self): formatting = getattr(self.parameters, 'http_url_formatting', False) if formatting: try: http_url = self.parameters.http_url.format( time=Time(formatting)) except TypeError: self.logger.error( "Wrongly formatted http_url_formatting parameter: %s. Should be boolean or a time-delta JSON.", formatting) raise except KeyError: self.logger.error( "Wrongly formatted http_url parameter: %s. Possible misspell with 'time' variable.", self.parameters.http_url) raise else: http_url = self.parameters.http_url self.logger.info("Downloading report from %r.", http_url) resp = self.session.get(url=http_url) if resp.status_code // 100 != 2: raise ValueError('HTTP response status code was %i.' % resp.status_code) self.logger.info("Report downloaded.") raw_reports = [] try: zfp = zipfile.ZipFile(io.BytesIO(resp.content), "r") except zipfile.BadZipfile: raw_reports.append(resp.text) else: self.logger.info('Extracting files:' "'%s'.", "', '".join(zfp.namelist())) for filename in zfp.namelist(): raw_reports.append(zfp.read(filename)) if self.extract_files: if isinstance(self.extract_files, str) and len(self.extract_files): self.extract_files = self.extract_files.split(",") self.logger.info('Extracting files from archive: ' "'%s'.", "', '".join(self.extract_files)) else: self.logger.info('Extracting all files from archive.') raw_reports = [ file for file in unzip(resp.content, self.extract_files) ] for raw_report in raw_reports: report = self.new_report() report.add("raw", raw_report) report.add("feed.url", http_url) self.send_message(report)
def process_message(self, uid, message): seen = False for attach in message.attachments: if not attach: continue try: attach_filename = attach['filename'] except KeyError: # https://github.com/certtools/intelmq/issues/1538 self.logger.debug( 'Skipping attachment because of missing filename.') continue if attach_filename.startswith( '"' ): # for imbox versions older than 0.9.5, see also above attach_filename = attach_filename[1:-1] if re.search(self.attach_regex, attach_filename): self.logger.debug("Found suitable attachment %s.", attach_filename) report = self.new_report() if self.extract_files: raw_reports = unzip(attach['content'].read(), self.extract_files, return_names=True, logger=self.logger) else: raw_reports = ((attach_filename, attach['content'].read()), ) for file_name, raw_report in raw_reports: report = self.new_report() report.add("raw", raw_report) if file_name: report.add("extra.file_name", file_name) report["extra.email_subject"] = message.subject report["extra.email_from"] = ','.join( x['email'] for x in message.sent_from) report["extra.email_message_id"] = message.message_id report["extra.email_date"] = message.date self.send_message(report) # Only mark read if message relevant to this instance, # so other instances watching this mailbox will still # check it. seen = True self.logger.info("Email report read.") return seen
def process(self): RT = rt.Rt(self.parameters.uri, self.parameters.user, self.parameters.password) if not RT.login(): raise ValueError('Login failed.') if self.not_older_than_type: if self.not_older_than_type == 'relative': self.not_older_than = datetime.now( ) - self.not_older_than_relative kwargs = {'Created__gt': self.not_older_than.isoformat()} self.logger.debug('Searching for tickets newer than %r.', kwargs['Created__gt']) else: kwargs = {} for parameter_name, rt_name in self.parameter_mapping.items(): parameter_value = getattr(self.parameters, parameter_name, None) if parameter_value: kwargs[rt_name] = parameter_value query = RT.search(order='Created', **kwargs) self.logger.info('%s results on search query.', len(query)) for ticket in query: ticket_id = int(ticket['id'].split('/')[1]) self.logger.debug('Process ticket %s.', ticket_id) content = 'attachment' success = False if self.parameters.attachment_regex: for (att_id, att_name, _, _) in RT.get_attachments(ticket_id): if re.search(self.parameters.attachment_regex, att_name): self.logger.debug('Found attachment %s: %r.', att_id, att_name) success = True content = 'attachment' self.extract_files = self.extract_attachment break if not success and self.parameters.url_regex: ticket = RT.get_history(ticket_id)[0] created = ticket['Created'] urlmatch = re.search(self.parameters.url_regex, ticket['Content']) if urlmatch: content = 'url' self.extract_files = self.extract_download url = urlmatch.group(0) self.logger.debug('Matching URL found %r.', url) success = True if not success: self.logger.info('No matching attachment or URL found.') continue report = self.new_report() if content == 'attachment': attachment = RT.get_attachment_content(ticket_id, att_id) created = RT.get_attachment(ticket_id, att_id)['Created'] raw = attachment else: resp = self.session.get(url=url) response_code_class = resp.status_code // 100 if response_code_class != 2: self.logger.error( 'HTTP response status code for %r was %s. Skipping ticket %d.', url, resp.status_code, ticket_id) if response_code_class == 4: self.logger.debug('Server response: %r.', resp.text) if self.parameters.set_status: RT.edit_ticket(ticket_id, status=self.parameters.set_status) if self.parameters.take_ticket: try: RT.take(ticket_id) except rt.BadRequest: self.logger.exception( "Could not take ticket %s.", ticket_id) else: self.logger.info('Skipping now.') continue self.logger.info("Report #%d downloaded.", ticket_id) self.logger.debug("Downloaded content has %d bytes.", len(resp.content)) if self.extract_download: raw = resp.content else: raw = resp.text report["extra.file_name"] = file_name_from_response(resp) report.add("rtir_id", ticket_id) report.add("time.observation", created + ' UTC', overwrite=True) """ On RT 3.8 these fields are only available on the original ticket, not the first history element as in 4.4 """ if "Subject" not in ticket: ticket = RT.get_ticket(ticket_id) report.add("extra.email_subject", ticket["Subject"]) report.add("extra.ticket_subject", ticket["Subject"]) report.add("extra.email_from", ','.join(ticket["Requestors"])) report.add("extra.ticket_requestors", ','.join(ticket["Requestors"])) report.add("extra.ticket_queue", ticket["Queue"]) report.add("extra.ticket_status", ticket["Status"]) report.add("extra.ticket_owner", ticket["Owner"]) if self.extract_files: try: unzipped = unzip(raw, self.extract_files, return_names=True, logger=self.logger) except ValueError: self.logger.error( 'Could not uncompress the file. Skipping for now.') continue for file_name, raw_report in unzipped: """ File name priority is: From the archive (zip, tar.gz) From the HTTP Response From the Attachment name For gz attachments, only the last options works """ report_new = report.copy() report_new.add("raw", raw_report) report_new.add("extra.file_name", file_name, overwrite=True) if "extra.file_name" not in report_new and att_name.endswith( '.gz'): report_new["extra.file_name"] = att_name[:-3] self.send_message(report_new) else: report.add("raw", raw) self.send_message(report) if self.parameters.take_ticket: try: RT.take(ticket_id) except rt.BadRequest: self.logger.exception("Could not take ticket %s.", ticket_id) if self.parameters.set_status: RT.edit_ticket(ticket_id, status=self.parameters.set_status)
def process(self): formatting = self.http_url_formatting if formatting: http_url = self.format_url(self.http_url, formatting) else: http_url = self.http_url self.logger.info("Downloading report from %r.", http_url) resp = self.http_get(http_url) if resp.status_code // 100 != 2: self.logger.debug('Request headers: %r.', resp.request.headers) self.logger.debug('Request body: %r.', resp.request.body) self.logger.debug('Response headers: %r.', resp.headers) self.logger.debug('Response body: %r.', resp.text) raise ValueError('HTTP response status code was %i.' % resp.status_code) self.logger.info("Report downloaded.") # PGP verification if self.use_gpg: result = self.verify_signature(data=resp.content) if not result: # Errors have been logged by the verify_signature function. return if not result.valid: self.logger.error("Signature for key {0.key_id} is not valid: {0.status}. Report rejected.".format(result)) return if result.trust_level < 1: self.logger.debug("Trust level not defined for key {}.".format(result.key_id)) elif result.trust_level < 3: self.logger.debug("Low trust level for key {0.key_id}: {0.trust_level}.".format(result)) self.logger.info("PGP signature checked with key {0.key_id}: {0.status}.".format(result)) # process reports raw_reports = [] if not self.extract_files: try: raw_reports = tuple(unzip(resp.content, True, try_gzip=False, try_tar=False, logger=self.logger, return_names=True)) except ValueError: raw_reports.append((None, resp.text)) else: self.logger.info('Extracting files: ' "'%s'.", "', '".join([file_name for file_name, _ in raw_reports])) else: raw_reports = unzip(resp.content, self.extract_files, return_names=True, logger=self.logger) for file_name, raw_report in raw_reports: report = self.new_report() report.add("raw", raw_report) report.add("feed.url", http_url) if file_name: report.add("extra.file_name", file_name) self.send_message(report)
def process(self): formatting = getattr(self.parameters, 'http_url_formatting', False) if formatting: try: http_url = self.parameters.http_url.format(time=Time(formatting)) except TypeError: self.logger.error("Wrongly formatted http_url_formatting parameter: %s. Should be boolean or a time-delta JSON.", formatting) raise except KeyError: self.logger.error("Wrongly formatted http_url parameter: %s. Possible misspell with 'time' variable.", self.parameters.http_url) raise else: http_url = self.parameters.http_url self.logger.info("Downloading report from %r.", http_url) timeoutretries = 0 resp = None while timeoutretries < self.http_timeout_max_tries and resp is None: try: resp = requests.get(url=http_url, auth=self.auth, proxies=self.proxy, headers=self.http_header, verify=self.http_verify_cert, cert=self.ssl_client_cert, timeout=self.http_timeout_sec) except requests.exceptions.Timeout: timeoutretries += 1 self.logger.warn("Timeout whilst downloading the report.") if resp is None and timeoutretries >= self.http_timeout_max_tries: self.logger.error("Request timed out %i times in a row.", timeoutretries) return if resp.status_code // 100 != 2: raise ValueError('HTTP response status code was %i.' % resp.status_code) self.logger.info("Report downloaded.") raw_reports = [] try: zfp = zipfile.ZipFile(io.BytesIO(resp.content), "r") except zipfile.BadZipfile: raw_reports.append(resp.text) else: self.logger.info('Extracting files:' "'%s'.", "', '".join(zfp.namelist())) for filename in zfp.namelist(): raw_reports.append(zfp.read(filename)) if self.extract_files: if isinstance(self.extract_files, str) and len(self.extract_files): self.extract_files = self.extract_files.split(",") self.logger.info('Extracting files from archive: ' "'%s'.", "', '".join(self.extract_files)) else: self.logger.info('Extracting all files from archive.') raw_reports = [file for file in unzip(resp.content, self.extract_files)] for raw_report in raw_reports: report = self.new_report() report.add("raw", raw_report) report.add("feed.url", http_url) self.send_message(report)