def parse(self, filename, provider=None): try: item = {} self.set_item_defaults(item, provider) with open(filename, 'r', encoding='latin-1') as f: lines = f.readlines() header = False body = False for line in lines: if self.START_OF_MESSAGE in line and not header: item['guid'] = filename + str(uuid.uuid4()) header = True continue if header: if line == '\n': continue if line[0] in self.header_map: if self.header_map[line[0]]: item[self.header_map[line[0]]] = line[1:-1] continue if line[0] == self.CATEGORY: item[self.ITEM_ANPA_CATEGORY] = [{'qcode': line[1]}] continue if line[0] == self.FORMAT: if line[1] == self.TEXT: item[ITEM_TYPE] = CONTENT_TYPE.TEXT continue if line[1] == self.TABULAR: item[FORMAT] = FORMATS.PRESERVED continue continue if line[0] == self.GENRE: genre = line[1:-1] if genre: genre_map = get_resource_service('vocabularies').find_one(req=None, _id='genre') item['genre'] = [x for x in genre_map.get('items', []) if x['qcode'] == genre and x['is_active']] continue if line[0] == self.IPTC: iptc_code = line[1:-1] if iptc_code.isdigit(): item[self.ITEM_SUBJECT] = [{'qcode': iptc_code, 'name': subject_codes[iptc_code]}] continue header = False body = True item['body_html'] = line else: if self.END_OF_MESSAGE in line: break if body: item['body_html'] = item.get('body_html', '') + line if item.get(FORMAT) == FORMATS.PRESERVED: item['body_html'] = '<pre>' + html.escape(item['body_html']) + '</pre>' return self.post_process_item(item, provider) except Exception as ex: raise AAPParserError.ZCZCParserError(exception=ex, provider=provider)
item['body_html'] = '<pre>' + '\n'.join(lines[lines_to_remove:]) # if the concatenation of the slugline and take key contain the phrase 'Brief Form' change the category to # h if (item.get(self.ITEM_SLUGLINE, '') + item.get(self.ITEM_TAKE_KEY, '')).lower().find('brief form') >= 0: item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}] # Another exception if 'NZ/AUST FIELDS' in item.get('body_html', ''): item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}] # if the item has been marked as convert to HTML then we need to use the racing reformat macro # to convert it. if lines[0] and lines[0].find('HH ') != -1: racing_reformat_macro(item) genre_map = get_resource_service('vocabularies').find_one(req=None, _id='genre') if genre_map: item['genre'] = [x for x in genre_map.get('items', []) if x['qcode'] == 'Racing Data' and x['is_active']] return item except Exception as ex: logger.exception(ex) try: register_feed_parser(ZCZCRacingParser.NAME, ZCZCRacingParser()) except AlreadyExistsError as ex: pass register_feeding_service_error('file', AAPParserError.ZCZCParserError().get_error_description())
item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}] # Another exception if 'NZ/AUST FIELDS' in item.get('body_html', ''): item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}] # if the item has been marked as convert to HTML then we need to use the racing reformat macro # to convert it. if lines[0] and lines[0].find('HH ') != -1: racing_reformat_macro(item) genre_map = get_resource_service('vocabularies').find_one( req=None, _id='genre') if genre_map: item['genre'] = [ x for x in genre_map.get('items', []) if x['qcode'] == 'Racing Data' and x['is_active'] ] return item except Exception as ex: logger.exception(ex) try: register_feed_parser(ZCZCRacingParser.NAME, ZCZCRacingParser()) except AlreadyExistsError: pass register_feeding_service_error( 'file', AAPParserError.ZCZCParserError().get_error_description())