def main(self, filename, strict_validation, database, show_info=True): self.show_info = show_info if database: self.database_handler = DatabaseHandler() self.database_handler.disable_journaling() if filename == '-': # pragma: no cover f = sys.stdin else: f = open(filename, encoding='utf-8', errors='backslashreplace') for paragraph in split_paragraphs_rpsl(f): self.parse_object(paragraph, strict_validation) print( f'Processed {self.obj_parsed} objects, {self.obj_errors} with errors' ) if self.obj_unknown: unknown_formatted = ', '.join(self.unknown_object_classes) print( f'Ignored {self.obj_unknown} objects due to unknown object classes: {unknown_formatted}' ) if self.database_handler: self.database_handler.commit() self.database_handler.close()
def run_import(self): f = open(self.filename, encoding='utf-8', errors='backslashreplace') for paragraph in split_paragraphs_rpsl(f): self.parse_object(paragraph) self.log_report() f.close()
def clean(self, query: str, response: Optional[str]) -> Optional[str]: """Clean the query response, so that the text can be compared.""" if not response: return response irr_query = query[:2].lower() response = response.strip().lower() cleaned_result_list = None if irr_query in SSP_QUERIES or (irr_query == '!r' and query.lower().strip().endswith(',o')): cleaned_result_list = response.split(' ') if irr_query in ['!6', '!g'] and cleaned_result_list: cleaned_result_list = [str(IP(ip)) for ip in cleaned_result_list] if cleaned_result_list: return ' '.join(sorted(list(set(cleaned_result_list)))) else: new_responses = [] for paragraph in split_paragraphs_rpsl(response): rpsl_obj = rpsl_object_from_text(paragraph.strip(), strict_validation=False) new_responses.append(rpsl_obj) new_responses.sort(key=lambda i: i.parsed_data.get('source', '') + i.rpsl_object_class + i.pk()) texts = [r.render_rpsl_text() for r in new_responses] return '\n'.join(OrderedSet(texts))
def _split_stream(self, data: str) -> None: """Split a stream into individual operations.""" paragraphs = split_paragraphs_rpsl(data, strip_comments=False) last_comment_seen = '' for paragraph in paragraphs: if self._handle_possible_start_line(paragraph): continue elif paragraph.startswith('%') or paragraph.startswith('#'): last_comment_seen = paragraph elif paragraph.startswith('ADD') or paragraph.startswith('DEL'): self._handle_operation(paragraph, paragraphs) if self.nrtm_source and last_comment_seen.upper().strip( ) != f'%END {self.source}': msg = f'NRTM stream error for {self.source}: last comment paragraph expected to be ' \ f'"%END {self.source}", but is actually {last_comment_seen.upper().strip()}' logger.error(msg) self.database_handler.record_mirror_error(self.source, msg) raise ValueError(msg) if self._current_op_serial > self.last_serial and self.version != '3': msg = f'NRTM stream error for {self.source}: expected operations up to and including serial ' \ f'{self.last_serial}, last operation was {self._current_op_serial}' logger.error(msg) self.database_handler.record_mirror_error(self.source, msg) raise ValueError(msg) if self.last_serial > 0: self.database_handler.record_serial_newest_mirror( self.source, self.last_serial)
def run_import(self) -> Optional[str]: """ Run the actual import. If direct_error_return is set, returns an error string on encountering the first error. Otherwise, returns None. """ f = open(self.filename, encoding='utf-8', errors='backslashreplace') for paragraph in split_paragraphs_rpsl(f): error = self._parse_object(paragraph) if error is not None: return error self.log_report() f.close() return None
def _split_stream(self, data: str) -> None: """Split a stream into individual operations.""" paragraphs = split_paragraphs_rpsl(data, strip_comments=False) for paragraph in paragraphs: if self._handle_possible_start_line(paragraph): continue elif paragraph.startswith('%') or paragraph.startswith('#'): continue # pragma: no cover -- falsely detected as not run by coverage library elif paragraph.startswith('ADD') or paragraph.startswith('DEL'): self._handle_operation(paragraph, paragraphs) if self._current_op_serial > self.last_serial and self.version != '3': msg = f'NRTM stream error for {self.source}: expected operations up to and including serial ' \ f'{self.last_serial}, last operation was {self._current_op_serial}' logger.error(msg) self.database_handler.record_mirror_error(self.source, msg) raise ValueError(msg) if self.last_serial > 0: self.database_handler.force_record_serial_seen(self.source, self.last_serial)
def run_import(self) -> Optional[str]: """ Run the actual import. If direct_error_return is set, returns an error string on encountering the first error. Otherwise, returns None. """ f = open(self.filename, encoding='utf-8', errors='backslashreplace') for paragraph in split_paragraphs_rpsl(f): try: rpsl_obj = self._parse_object(paragraph) except RPSLImportError as e: if self.direct_error_return: return e.message else: if rpsl_obj: self.database_handler.upsert_rpsl_object( rpsl_obj, origin=JournalEntryOrigin.mirror) self.log_report() f.close() if self.serial: self.database_handler.record_serial_seen(self.source, self.serial) return None
def run_import(self) -> Optional[str]: """ Run the actual import. If direct_error_return is set, returns an error string on encountering the first error. Otherwise, returns None. """ objs_from_file = [] f = open(self.filename, encoding='utf-8', errors='backslashreplace') for paragraph in split_paragraphs_rpsl(f): try: rpsl_obj = self._parse_object(paragraph) except RPSLImportError as e: if self.direct_error_return: return e.message else: if rpsl_obj: objs_from_file.append(rpsl_obj) f.close() query = RPSLDatabaseQuery(ordered_by_sources=False, enable_ordering=False, column_names=['rpsl_pk' ]).sources([self.source]) current_pks = { row['rpsl_pk'] for row in self.database_handler.execute_query(query) } file_objs_by_pk = {obj.pk(): obj for obj in objs_from_file} file_pks = set(file_objs_by_pk.keys()) new_pks = file_pks - current_pks deleted_pks = current_pks - file_pks retained_pks = file_pks.intersection(current_pks) self.obj_new = len(new_pks) self.obj_deleted = len(deleted_pks) self.obj_retained = len(retained_pks) for rpsl_pk, file_obj in filter(lambda i: i[0] in new_pks, file_objs_by_pk.items()): self.database_handler.upsert_rpsl_object( file_obj, JournalEntryOrigin.synthetic_nrtm) for rpsl_pk in deleted_pks: self.database_handler.delete_rpsl_object( rpsl_pk=rpsl_pk, source=self.source, origin=JournalEntryOrigin.synthetic_nrtm) # This query does not filter on retained_pks. The expectation is that most # objects are retained, and therefore it is much faster to query the entire source. query = RPSLDatabaseQuery(ordered_by_sources=False, enable_ordering=False, column_names=['rpsl_pk', 'object_text']) query = query.sources([self.source]) for row in self.database_handler.execute_query(query): try: file_obj = file_objs_by_pk[row['rpsl_pk']] except KeyError: continue if file_obj.render_rpsl_text() != remove_last_modified( row['object_text']): self.database_handler.upsert_rpsl_object( file_obj, JournalEntryOrigin.synthetic_nrtm) self.obj_modified += 1 self.log_report() return None