def append_error(self, error_type, model, field, message, sourceline, variable='', iati_id=None): if not settings.ERROR_LOGS_ENABLED: return # get iati identifier iati_identifier = None if iati_id: iati_identifier = iati_id elif self.dataset.filetype == 1: activity = self.get_model('Activity') if activity and activity.iati_identifier: iati_identifier = activity.iati_identifier # elif activity: # iati_identifier = activity.id else: organisation = self.get_model('Organisation') if organisation and organisation.organisation_identifier: iati_identifier = organisation.organisation_identifier # elif organisation: # iati_identifier = organisation.id if not iati_identifier and hasattr(self, 'identifier'): iati_identifier = self.identifier elif not iati_identifier: iati_identifier = 'no-identifier' if variable: variable = str(variable)[0:255] note = DatasetNote(dataset=self.dataset, iati_identifier=iati_identifier, model=model, field=field, message=str(message)[0:255], exception_type=error_type, line_number=sourceline, variable=variable) self.errors.append(note)
def __init__(self, dataset, root=None, force_reparse=False): """ Given a IATI dataset, prepare an IATI parser """ if settings.IATI_PARSER_DISABLED: raise ParserDisabledError( "The parser is disabled on this instance of OIPA") self.dataset = dataset self.url = dataset.source_url self.force_reparse = force_reparse self.hash_changed = True self.valid_dataset = True if root is not None: self.root = root self.parser = self._prepare_parser(self.root, dataset) return file_grabber = FileGrabber() response = file_grabber.get_the_file(self.url) from iati_synchroniser.models import DatasetNote if not response or response.status_code != 200: self.valid_dataset = False note = DatasetNote( dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="Cannot access the URL", exception_type='UrlError', line_number=None ) note.save() self.dataset.note_count = 1 # If not a XML file them sha1 should blank self.dataset.sha1 = '' self.dataset.save() return # 1. Turn bytestring into string (treat it using specified encoding): try: iati_file = smart_text(response.content, 'utf-8') # XXX: some files contain non utf-8 characters: # FIXME: this is hardcoded: except UnicodeDecodeError: iati_file = smart_text(response.content, 'latin-1') # 2. Encode the string to use for hashing: hasher = hashlib.sha1() hasher.update(iati_file.encode('utf-8')) sha1 = hasher.hexdigest() if dataset.sha1 == sha1: # dataset did not change, no need to reparse normally self.hash_changed = False else: dataset.sha1 = sha1 # Save a sha1 in the first time of the process parse dataset.save() try: self.root = etree.fromstring(response.content) self.parser = self._prepare_parser(self.root, dataset) if settings.ERROR_LOGS_ENABLED: self.xsd_validate() # TODO: when moving error messages to frontend, create a separate error # for wrong file type: except etree.XMLSyntaxError as e: self.valid_dataset = False DatasetNote.objects.filter(dataset=self.dataset).delete() note = DatasetNote( dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="This file contains XML syntax errors or it's not an " "XML file", exception_type='XMLSyntaxError', line_number=None ) note.save() self.dataset.note_count = 1 # If not the XML should not have a sha1 self.dataset.sha1 = '' self.dataset.save() return
def __init__(self, dataset, root=None, force_reparse=False): """ Given a IATI dataset, prepare an IATI parser """ if settings.IATI_PARSER_DISABLED: raise ParserDisabledError( "The parser is disabled on this instance of OIPA") self.dataset = dataset self.url = dataset.source_url self.force_reparse = force_reparse self.hash_changed = True self.valid_dataset = True if root is not None: self.root = root self.parser = self._prepare_parser(self.root, dataset) return # file_grabber = FileGrabber() # response = file_grabber.get_the_file(self.url) response = None headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X ' '10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' } # NOQA: E501 try: response = requests.get(self.url, headers=headers, timeout=30) except requests.exceptions.SSLError: response = requests.get(self.url, verify=False, headers=headers, timeout=30) except requests.exceptions.Timeout: response = requests.get(self.url, verify=False, timeout=30) except (requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects, requests.exceptions.Timeout): pass finally: pass from iati_synchroniser.models import DatasetNote if not response or response.status_code != 200: self.valid_dataset = False note = DatasetNote(dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="Cannot access the URL", exception_type='UrlError', line_number=None) note.save() self.dataset.note_count = 1 # If not a XML file them sha1 should blank self.dataset.sha1 = '' self.dataset.save() return # 1. Turn bytestring into string (treat it using specified encoding): try: iati_file = smart_text(response.content, 'utf-8') # XXX: some files contain non utf-8 characters: # FIXME: this is hardcoded: except UnicodeDecodeError: iati_file = smart_text(response.content, 'latin-1') # 2. Encode the string to use for hashing: hasher = hashlib.sha1() hasher.update(iati_file.encode('utf-8')) sha1 = hasher.hexdigest() if dataset.sha1 == sha1: # dataset did not change, no need to reparse normally self.hash_changed = False else: dataset.sha1 = sha1 # Save a sha1 in the first time of the process parse dataset.save() try: parser = etree.XMLParser(huge_tree=True) tree = etree.parse(BytesIO(response.content), parser) self.root = tree.getroot() self.parser = self._prepare_parser(self.root, dataset) if settings.ERROR_LOGS_ENABLED: self.xsd_validate() # TODO: when moving error messages to frontend, create a separate error # for wrong file type: except etree.XMLSyntaxError as e: self.valid_dataset = False DatasetNote.objects.filter(dataset=self.dataset).delete() note = DatasetNote( dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="This file contains XML syntax errors or it's not an " "XML file", exception_type='XMLSyntaxError', line_number=None) note.save() self.dataset.note_count = 1 # If not the XML should not have a sha1 self.dataset.sha1 = '' self.dataset.save() return
def __init__(self, dataset, root=None, force_reparse=False): """ Given a IATI dataset, prepare an IATI parser """ if settings.IATI_PARSER_DISABLED: raise ParserDisabledError( "The parser is disabled on this instance of OIPA") self.dataset = dataset self.url = dataset.source_url self.force_reparse = force_reparse self.hash_changed = True self.valid_dataset = True if root is not None: self.root = root self.parser = self._prepare_parser(self.root, dataset) return file_grabber = FileGrabber() response = file_grabber.get_the_file(self.url) from iati_synchroniser.models import DatasetNote if not response or response.code != 200: self.valid_dataset = False DatasetNote.objects.filter(dataset=self.dataset).delete() note = DatasetNote(dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="URL down or does not exist", exception_type='UrlError', line_number=None) note.save() self.dataset.note_count = 1 self.dataset.save() return iati_file = response.read() iati_file_str = str(iati_file) hasher = hashlib.sha1() hasher.update(iati_file_str) sha1 = hasher.hexdigest() if dataset.sha1 == sha1: # dataset did not change, no need to reparse normally self.hash_changed = False else: dataset.sha1 = sha1 try: self.root = etree.fromstring(iati_file_str) self.parser = self._prepare_parser(self.root, dataset) except etree.XMLSyntaxError as e: self.valid_dataset = False DatasetNote.objects.filter(dataset=self.dataset).delete() note = DatasetNote(dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="This file contains XML syntax errors", exception_type='XMLSyntaxError', line_number=None) note.save() self.dataset.note_count = 1 self.dataset.save() return
def __init__(self, dataset, root=None, force_reparse=False): """ Given a IATI dataset, prepare an IATI parser """ if settings.IATI_PARSER_DISABLED: raise ParserDisabledError( "The parser is disabled on this instance of OIPA") self.dataset = dataset self.url = dataset.source_url self.force_reparse = force_reparse self.hash_changed = True self.valid_dataset = True if root is not None: self.root = root self.parser = self._prepare_parser(self.root, dataset) return file_grabber = FileGrabber() response = file_grabber.get_the_file(self.url) from iati_synchroniser.models import DatasetNote if not response or response.status_code != 200: self.valid_dataset = False note = DatasetNote(dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="Cannot access the URL", exception_type='UrlError', line_number=None) note.save() self.dataset.note_count = 1 self.dataset.sha1 = 'none' self.dataset.save() return # 1. Turn bytestring into string (treat it using specified encoding): try: iati_file = smart_text(response.content, 'utf-8') # XXX: some files contain non utf-8 characters: # FIXME: this is hardcoded: except UnicodeDecodeError: iati_file = smart_text(response.content, 'latin-1') # 2. Encode the string to use for hashing: hasher = hashlib.sha1() hasher.update(iati_file.encode('utf-8')) sha1 = hasher.hexdigest() if dataset.sha1 == sha1: # dataset did not change, no need to reparse normally self.hash_changed = False else: dataset.sha1 = sha1 try: self.root = etree.fromstring(response.content) self.parser = self._prepare_parser(self.root, dataset) if settings.ERROR_LOGS_ENABLED: self.xsd_validate() # TODO: when moving error messages to frontend, create a separate error # for wrong file type: except etree.XMLSyntaxError as e: self.valid_dataset = False DatasetNote.objects.filter(dataset=self.dataset).delete() note = DatasetNote( dataset=self.dataset, iati_identifier="n/a", model="n/a", field="n/a", message="This file contains XML syntax errors or it's not an " "XML file", exception_type='XMLSyntaxError', line_number=None) note.save() self.dataset.note_count = 1 self.dataset.save() return