Beispiel #1
0
    def parse(self, filename, provider=None):
        try:
            item = {}
            self.set_item_defaults(item, provider)

            with open(filename, 'r', encoding='latin-1') as f:
                lines = f.readlines()
                header = False
                body = False
                for line in lines:
                    if self.START_OF_MESSAGE in line and not header:
                        item['guid'] = filename + str(uuid.uuid4())
                        header = True
                        continue
                    if header:
                        if line == '\n':
                            continue
                        if line[0] in self.header_map:
                            if self.header_map[line[0]]:
                                item[self.header_map[line[0]]] = line[1:-1]
                            continue
                        if line[0] == self.CATEGORY:
                            item[self.ITEM_ANPA_CATEGORY] = [{'qcode': line[1]}]
                            continue
                        if line[0] == self.FORMAT:
                            if line[1] == self.TEXT:
                                item[ITEM_TYPE] = CONTENT_TYPE.TEXT
                                continue
                            if line[1] == self.TABULAR:
                                item[FORMAT] = FORMATS.PRESERVED
                                continue
                            continue
                        if line[0] == self.GENRE:
                            genre = line[1:-1]
                            if genre:
                                genre_map = get_resource_service('vocabularies').find_one(req=None, _id='genre')
                                item['genre'] = [x for x in genre_map.get('items', []) if
                                                 x['qcode'] == genre and x['is_active']]
                            continue
                        if line[0] == self.IPTC:
                            iptc_code = line[1:-1]
                            if iptc_code.isdigit():
                                item[self.ITEM_SUBJECT] = [{'qcode': iptc_code, 'name': subject_codes[iptc_code]}]
                            continue
                        header = False
                        body = True
                        item['body_html'] = line
                    else:
                        if self.END_OF_MESSAGE in line:
                            break
                        if body:
                            item['body_html'] = item.get('body_html', '') + line
                if item.get(FORMAT) == FORMATS.PRESERVED:
                    item['body_html'] = '<pre>' + html.escape(item['body_html']) + '</pre>'

            return self.post_process_item(item, provider)

        except Exception as ex:
            raise AAPParserError.ZCZCParserError(exception=ex, provider=provider)
            item['body_html'] = '<pre>' + '\n'.join(lines[lines_to_remove:])

            # if the concatenation of the slugline and take key contain the phrase 'Brief Form' change the category to
            # h
            if (item.get(self.ITEM_SLUGLINE, '') + item.get(self.ITEM_TAKE_KEY, '')).lower().find('brief form') >= 0:
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]
            # Another exception
            if 'NZ/AUST FIELDS' in item.get('body_html', ''):
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]

            # if the item has been marked as convert to HTML then we need to use the racing reformat macro
            # to convert it.
            if lines[0] and lines[0].find('HH ') != -1:
                racing_reformat_macro(item)

            genre_map = get_resource_service('vocabularies').find_one(req=None, _id='genre')
            if genre_map:
                item['genre'] = [x for x in genre_map.get('items', []) if
                                 x['qcode'] == 'Racing Data' and x['is_active']]
            return item

        except Exception as ex:
            logger.exception(ex)


try:
    register_feed_parser(ZCZCRacingParser.NAME, ZCZCRacingParser())
except AlreadyExistsError as ex:
    pass
register_feeding_service_error('file', AAPParserError.ZCZCParserError().get_error_description())
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]
            # Another exception
            if 'NZ/AUST FIELDS' in item.get('body_html', ''):
                item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}]

            # if the item has been marked as convert to HTML then we need to use the racing reformat macro
            # to convert it.
            if lines[0] and lines[0].find('HH ') != -1:
                racing_reformat_macro(item)

            genre_map = get_resource_service('vocabularies').find_one(
                req=None, _id='genre')
            if genre_map:
                item['genre'] = [
                    x for x in genre_map.get('items', [])
                    if x['qcode'] == 'Racing Data' and x['is_active']
                ]
            return item

        except Exception as ex:
            logger.exception(ex)


try:
    register_feed_parser(ZCZCRacingParser.NAME, ZCZCRacingParser())
except AlreadyExistsError:
    pass
register_feeding_service_error(
    'file',
    AAPParserError.ZCZCParserError().get_error_description())