예제 #1
0
    def append_error(self,
                     error_type,
                     model,
                     field,
                     message,
                     sourceline,
                     variable='',
                     iati_id=None):
        if not settings.ERROR_LOGS_ENABLED:
            return

        # get iati identifier
        iati_identifier = None
        if iati_id:
            iati_identifier = iati_id
        elif self.dataset.filetype == 1:
            activity = self.get_model('Activity')
            if activity and activity.iati_identifier:
                iati_identifier = activity.iati_identifier
            # elif activity:
            #     iati_identifier = activity.id
        else:
            organisation = self.get_model('Organisation')
            if organisation and organisation.organisation_identifier:
                iati_identifier = organisation.organisation_identifier
            # elif organisation:
            #     iati_identifier = organisation.id

        if not iati_identifier and hasattr(self, 'identifier'):
            iati_identifier = self.identifier
        elif not iati_identifier:
            iati_identifier = 'no-identifier'

        if variable:
            variable = str(variable)[0:255]

        note = DatasetNote(dataset=self.dataset,
                           iati_identifier=iati_identifier,
                           model=model,
                           field=field,
                           message=str(message)[0:255],
                           exception_type=error_type,
                           line_number=sourceline,
                           variable=variable)

        self.errors.append(note)
예제 #2
0
    def __init__(self, dataset, root=None, force_reparse=False):
        """
        Given a IATI dataset, prepare an IATI parser
        """

        if settings.IATI_PARSER_DISABLED:
            raise ParserDisabledError(
                "The parser is disabled on this instance of OIPA")

        self.dataset = dataset
        self.url = dataset.source_url
        self.force_reparse = force_reparse
        self.hash_changed = True
        self.valid_dataset = True

        if root is not None:
            self.root = root
            self.parser = self._prepare_parser(self.root, dataset)
            return

        file_grabber = FileGrabber()
        response = file_grabber.get_the_file(self.url)

        from iati_synchroniser.models import DatasetNote
        if not response or response.status_code != 200:
            self.valid_dataset = False
            note = DatasetNote(
                dataset=self.dataset,
                iati_identifier="n/a",
                model="n/a",
                field="n/a",
                message="Cannot access the URL",
                exception_type='UrlError',
                line_number=None
            )
            note.save()
            self.dataset.note_count = 1

            # If not a XML file them sha1 should blank
            self.dataset.sha1 = ''

            self.dataset.save()
            return

        # 1. Turn bytestring into string (treat it using specified encoding):
        try:
            iati_file = smart_text(response.content, 'utf-8')
        # XXX: some files contain non utf-8 characters:
        # FIXME: this is hardcoded:
        except UnicodeDecodeError:
            iati_file = smart_text(response.content, 'latin-1')

        # 2. Encode the string to use for hashing:
        hasher = hashlib.sha1()
        hasher.update(iati_file.encode('utf-8'))
        sha1 = hasher.hexdigest()

        if dataset.sha1 == sha1:
            # dataset did not change, no need to reparse normally
            self.hash_changed = False
        else:
            dataset.sha1 = sha1

            # Save a sha1 in the first time of the process parse
            dataset.save()

        try:
            self.root = etree.fromstring(response.content)
            self.parser = self._prepare_parser(self.root, dataset)

            if settings.ERROR_LOGS_ENABLED:
                self.xsd_validate()

        # TODO: when moving error messages to frontend, create a separate error
        # for wrong file type:
        except etree.XMLSyntaxError as e:
            self.valid_dataset = False
            DatasetNote.objects.filter(dataset=self.dataset).delete()
            note = DatasetNote(
                dataset=self.dataset,
                iati_identifier="n/a",
                model="n/a",
                field="n/a",
                message="This file contains XML syntax errors or it's not an "
                        "XML file",
                exception_type='XMLSyntaxError',
                line_number=None
            )
            note.save()
            self.dataset.note_count = 1

            # If not the XML should not have a sha1
            self.dataset.sha1 = ''

            self.dataset.save()
            return
예제 #3
0
    def __init__(self, dataset, root=None, force_reparse=False):
        """
        Given a IATI dataset, prepare an IATI parser
        """

        if settings.IATI_PARSER_DISABLED:
            raise ParserDisabledError(
                "The parser is disabled on this instance of OIPA")

        self.dataset = dataset
        self.url = dataset.source_url
        self.force_reparse = force_reparse
        self.hash_changed = True
        self.valid_dataset = True

        if root is not None:
            self.root = root
            self.parser = self._prepare_parser(self.root, dataset)
            return

        # file_grabber = FileGrabber()
        # response = file_grabber.get_the_file(self.url)
        response = None
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X '
            '10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
        }  # NOQA: E501

        try:
            response = requests.get(self.url, headers=headers, timeout=30)
        except requests.exceptions.SSLError:
            response = requests.get(self.url,
                                    verify=False,
                                    headers=headers,
                                    timeout=30)
        except requests.exceptions.Timeout:
            response = requests.get(self.url, verify=False, timeout=30)
        except (requests.exceptions.ConnectionError,
                requests.exceptions.TooManyRedirects,
                requests.exceptions.Timeout):
            pass
        finally:
            pass

        from iati_synchroniser.models import DatasetNote
        if not response or response.status_code != 200:
            self.valid_dataset = False
            note = DatasetNote(dataset=self.dataset,
                               iati_identifier="n/a",
                               model="n/a",
                               field="n/a",
                               message="Cannot access the URL",
                               exception_type='UrlError',
                               line_number=None)
            note.save()
            self.dataset.note_count = 1

            # If not a XML file them sha1 should blank
            self.dataset.sha1 = ''

            self.dataset.save()
            return

        # 1. Turn bytestring into string (treat it using specified encoding):
        try:
            iati_file = smart_text(response.content, 'utf-8')
        # XXX: some files contain non utf-8 characters:
        # FIXME: this is hardcoded:
        except UnicodeDecodeError:
            iati_file = smart_text(response.content, 'latin-1')

        # 2. Encode the string to use for hashing:
        hasher = hashlib.sha1()
        hasher.update(iati_file.encode('utf-8'))
        sha1 = hasher.hexdigest()

        if dataset.sha1 == sha1:
            # dataset did not change, no need to reparse normally
            self.hash_changed = False
        else:
            dataset.sha1 = sha1

            # Save a sha1 in the first time of the process parse
            dataset.save()

        try:
            parser = etree.XMLParser(huge_tree=True)
            tree = etree.parse(BytesIO(response.content), parser)
            self.root = tree.getroot()
            self.parser = self._prepare_parser(self.root, dataset)

            if settings.ERROR_LOGS_ENABLED:
                self.xsd_validate()

        # TODO: when moving error messages to frontend, create a separate error
        # for wrong file type:
        except etree.XMLSyntaxError as e:
            self.valid_dataset = False
            DatasetNote.objects.filter(dataset=self.dataset).delete()
            note = DatasetNote(
                dataset=self.dataset,
                iati_identifier="n/a",
                model="n/a",
                field="n/a",
                message="This file contains XML syntax errors or it's not an "
                "XML file",
                exception_type='XMLSyntaxError',
                line_number=None)
            note.save()
            self.dataset.note_count = 1

            # If not the XML should not have a sha1
            self.dataset.sha1 = ''

            self.dataset.save()
            return
예제 #4
0
    def __init__(self, dataset, root=None, force_reparse=False):
        """
        Given a IATI dataset, prepare an IATI parser
        """

        if settings.IATI_PARSER_DISABLED:
            raise ParserDisabledError(
                "The parser is disabled on this instance of OIPA")

        self.dataset = dataset
        self.url = dataset.source_url
        self.force_reparse = force_reparse
        self.hash_changed = True
        self.valid_dataset = True

        if root is not None:
            self.root = root
            self.parser = self._prepare_parser(self.root, dataset)
            return

        file_grabber = FileGrabber()
        response = file_grabber.get_the_file(self.url)
        from iati_synchroniser.models import DatasetNote
        if not response or response.code != 200:
            self.valid_dataset = False
            DatasetNote.objects.filter(dataset=self.dataset).delete()
            note = DatasetNote(dataset=self.dataset,
                               iati_identifier="n/a",
                               model="n/a",
                               field="n/a",
                               message="URL down or does not exist",
                               exception_type='UrlError',
                               line_number=None)
            note.save()
            self.dataset.note_count = 1
            self.dataset.save()
            return

        iati_file = response.read()
        iati_file_str = str(iati_file)

        hasher = hashlib.sha1()
        hasher.update(iati_file_str)
        sha1 = hasher.hexdigest()

        if dataset.sha1 == sha1:
            # dataset did not change, no need to reparse normally
            self.hash_changed = False
        else:
            dataset.sha1 = sha1

        try:
            self.root = etree.fromstring(iati_file_str)
            self.parser = self._prepare_parser(self.root, dataset)
        except etree.XMLSyntaxError as e:
            self.valid_dataset = False
            DatasetNote.objects.filter(dataset=self.dataset).delete()
            note = DatasetNote(dataset=self.dataset,
                               iati_identifier="n/a",
                               model="n/a",
                               field="n/a",
                               message="This file contains XML syntax errors",
                               exception_type='XMLSyntaxError',
                               line_number=None)
            note.save()
            self.dataset.note_count = 1
            self.dataset.save()
            return
예제 #5
0
    def __init__(self, dataset, root=None, force_reparse=False):
        """
        Given a IATI dataset, prepare an IATI parser
        """

        if settings.IATI_PARSER_DISABLED:
            raise ParserDisabledError(
                "The parser is disabled on this instance of OIPA")

        self.dataset = dataset
        self.url = dataset.source_url
        self.force_reparse = force_reparse
        self.hash_changed = True
        self.valid_dataset = True

        if root is not None:
            self.root = root
            self.parser = self._prepare_parser(self.root, dataset)
            return

        file_grabber = FileGrabber()
        response = file_grabber.get_the_file(self.url)

        from iati_synchroniser.models import DatasetNote
        if not response or response.status_code != 200:
            self.valid_dataset = False
            note = DatasetNote(dataset=self.dataset,
                               iati_identifier="n/a",
                               model="n/a",
                               field="n/a",
                               message="Cannot access the URL",
                               exception_type='UrlError',
                               line_number=None)
            note.save()
            self.dataset.note_count = 1
            self.dataset.sha1 = 'none'
            self.dataset.save()
            return

        # 1. Turn bytestring into string (treat it using specified encoding):
        try:
            iati_file = smart_text(response.content, 'utf-8')
        # XXX: some files contain non utf-8 characters:
        # FIXME: this is hardcoded:
        except UnicodeDecodeError:
            iati_file = smart_text(response.content, 'latin-1')

        # 2. Encode the string to use for hashing:
        hasher = hashlib.sha1()
        hasher.update(iati_file.encode('utf-8'))
        sha1 = hasher.hexdigest()

        if dataset.sha1 == sha1:
            # dataset did not change, no need to reparse normally
            self.hash_changed = False
        else:
            dataset.sha1 = sha1

        try:
            self.root = etree.fromstring(response.content)
            self.parser = self._prepare_parser(self.root, dataset)

            if settings.ERROR_LOGS_ENABLED:
                self.xsd_validate()

        # TODO: when moving error messages to frontend, create a separate error
        # for wrong file type:
        except etree.XMLSyntaxError as e:
            self.valid_dataset = False
            DatasetNote.objects.filter(dataset=self.dataset).delete()
            note = DatasetNote(
                dataset=self.dataset,
                iati_identifier="n/a",
                model="n/a",
                field="n/a",
                message="This file contains XML syntax errors or it's not an "
                "XML file",
                exception_type='XMLSyntaxError',
                line_number=None)
            note.save()
            self.dataset.note_count = 1
            self.dataset.save()
            return