item['body_html'] = '<pre>' + '\n'.join(lines[lines_to_remove:]) # if the concatenation of the slugline and take key contain the phrase 'Brief Form' change the category to # h if (item.get(self.ITEM_SLUGLINE, '') + item.get(self.ITEM_TAKE_KEY, '')).lower().find('brief form') >= 0: item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}] # Another exception if 'NZ/AUST FIELDS' in item.get('body_html', ''): item[self.ITEM_ANPA_CATEGORY] = [{'qcode': 'h'}] # if the item has been marked as convert to HTML then we need to use the racing reformat macro # to convert it. if lines[0] and lines[0].find('HH ') != -1: racing_reformat_macro(item) genre_map = get_resource_service('vocabularies').find_one(req=None, _id='genre') if genre_map: item['genre'] = [x for x in genre_map.get('items', []) if x['qcode'] == 'Racing Data' and x['is_active']] return item except Exception as ex: logger.exception(ex) try: register_feed_parser(ZCZCRacingParser.NAME, ZCZCRacingParser()) except AlreadyExistsError as ex: pass register_feeding_service_error('file', AAPParserError.ZCZCParserError().get_error_description())
date = date_parser(dateline, fuzzy=True).replace(tzinfo=utc) item['dateline']['date'] = date item['dateline']['source'] = source[:-4].strip() item['dateline']['text'] = dateline.strip() # Attempt to set the city data to the dateline.location key cities = app.locators.find_cities() for city in dateline.replace(' and ', ',').split(','): located = [c for c in cities if c['city'].lower() == city.strip().lower()] if len(located) > 0: item['dateline']['located'] = located[0] break if 'located' not in item['dateline']: city = dateline.split(',')[0] item['dateline']['located'] = { 'city_code': city, 'city': city, 'tz': 'UTC', 'dateline': 'city' } try: register_feed_parser(AsiaNetFeedParser.NAME, AsiaNetFeedParser()) except AlreadyExistsError as ex: pass register_feeding_service_error('file', AAPParserError.AsiaNetParserError().get_error_description())
def set_item_defaults(self, item, filename): item['guid'] = filename + ':' + str(uuid.uuid4()) item['urgency'] = 5 item['pubstatus'] = 'usable' item['versioncreated'] = utcnow() item[ITEM_TYPE] = CONTENT_TYPE.TEXT item['anpa_category'] = [{'qcode': 'f'}] item['subject'] = [{'qcode': '04000000', 'name': subject_codes['04000000']}] item[FORMAT] = FORMATS.HTML def datetime(self, string): """ Convert the date string parsed from the source file to a datetime, assumes that the time is local to Sydney Australia :param string: :return: """ # 06 June 2016 14:00:00 local_dt = datetime.datetime.strptime(string, '%d %B %Y %H:%M:%S') local_tz = pytz.timezone('Australia/Sydney') aus_dt = local_tz.localize(local_dt, is_dst=None) return aus_dt.astimezone(pytz.utc) try: register_feed_parser(NewsBitesFeedParser.NAME, NewsBitesFeedParser()) except AlreadyExistsError as ex: pass register_feeding_service_error('file', AAPParserError.NewsBitesParserError().get_error_description())