def get_areas(): """ Read the GeoJson file that outlines a list of named polygons describing the areas we can extract the prices from :return: """ areas = [] path = get_filepath('fuel_geojson.json') try: with path.open('r') as f: areas = json.load(f) except Exception as ex: logger.error('Exception loading fuel_geojson.json : {}'.format(ex)) return areas
def _get_topics(self, formatted_article, topic_set, topic_index): """ Using the Reuters catalog map the IPTC codes to the Reuters topics :param formatted_article: :param topic_set: :param topic_index: :return: """ # Some AAP IPTC codes are spcificaly mapped to N2000 codes aap_map = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='reuters_iptc_n2000_map') path = get_filepath('topicset-reuters-3rdParty_news2000.xml') tree = etree.parse(str(path)) for subject in formatted_article.get('subject', []): if aap_map: aap_mapped = [x for x in aap_map.get('items', []) if x.get('qcode', '') == subject.get('qcode', '')] if len(aap_mapped) == 1: n2000_code = aap_mapped[0].get('name') topic = tree.xpath( './NewsItem/TopicSet/Topic/FormalName[text()="' + n2000_code + '"]') if topic and len(topic) == 1: topic_type = str(topic[0].xpath('../TopicType/@FormalName')[0]) topic_index = topic_index + 1 topic = SubElement(topic_set, 'Topic', attrib={'Duid': 'T{num:04d}'.format(num=topic_index)}) SubElement(topic, 'TopicType', attrib={'FormalName': topic_type, 'Scheme': 'RTT'}) SubElement(topic, 'FormalName', attrib={'Scheme': 'N2000'}).text = n2000_code break topics = tree.xpath('./NewsItem/TopicSet/Topic/FormalName[text()="iptc:' + subject.get('qcode', '') + '"]') if len(topics) == 1: topic_type = str(topics[0].xpath('../TopicType/@FormalName')[0]) other_thing = topics[0].xpath('../FormalName[@Scheme="N2000"]')[0].text topic_index = topic_index + 1 topic = SubElement(topic_set, 'Topic', attrib={'Duid': 'T{num:04d}'.format(num=topic_index)}) SubElement(topic, 'TopicType', attrib={'FormalName': topic_type, 'Scheme': 'RTT'}) SubElement(topic, 'FormalName', attrib={'Scheme': 'N2000'}).text = other_thing return topic_index
def run(self, id): self.eocstat_map = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='eventoccurstatus') # You will need to generate a credentials file creds = str(get_filepath('Quickstart-44dd51f59d5a.json')) credentials = ServiceAccountCredentials.from_json_keyfile_name(creds, self.scope) gc = gspread.authorize(credentials) sheet = gc.open("Sport Fixtures for Superdesk Planning Tool") # Scan the worksheets skipping the first cities = app.locators.find_cities() for wks in sheet.worksheets()[1:]: # print(wks.title) dates = wks.col_values(1, value_render_option='UNFORMATTED_VALUE') all_vals = wks.get_all_values() for v in all_vals: self.get_time_zone(cities, v, wks.title) self._process_sheet(wks.title, dates, all_vals, wks.id) time.sleep(2)
def _get_topics(self, formatted_article, topic_set, topic_index): """ Using the Reuters catalog map the IPTC codes to the Reuters topics :param formatted_article: :param topic_set: :param topic_index: :return: """ # Some AAP IPTC codes are spcificaly mapped to N2000 codes aap_map = superdesk.get_resource_service('vocabularies').find_one( req=None, _id='reuters_iptc_n2000_map') path = get_filepath('topicset-reuters-3rdParty_news2000.xml') tree = etree.parse(str(path)) for subject in formatted_article.get('subject', []): if aap_map: aap_mapped = [ x for x in aap_map.get('items', []) if x.get('qcode', '') == subject.get('qcode', '') ] if len(aap_mapped) == 1: n2000_code = aap_mapped[0].get('name') topic = tree.xpath( './NewsItem/TopicSet/Topic/FormalName[text()="' + n2000_code + '"]') if topic and len(topic) == 1: topic_type = str( topic[0].xpath('../TopicType/@FormalName')[0]) topic_index = topic_index + 1 topic = SubElement( topic_set, 'Topic', attrib={ 'Duid': 'T{num:04d}'.format(num=topic_index) }) SubElement(topic, 'TopicType', attrib={ 'FormalName': topic_type, 'Scheme': 'RTT' }) SubElement(topic, 'FormalName', attrib={ 'Scheme': 'N2000' }).text = n2000_code break topics = tree.xpath( './NewsItem/TopicSet/Topic/FormalName[text()="iptc:' + subject.get('qcode', '') + '"]') if len(topics) == 1: topic_type = str( topics[0].xpath('../TopicType/@FormalName')[0]) other_thing = topics[0].xpath( '../FormalName[@Scheme="N2000"]')[0].text topic_index = topic_index + 1 topic = SubElement( topic_set, 'Topic', attrib={'Duid': 'T{num:04d}'.format(num=topic_index)}) SubElement(topic, 'TopicType', attrib={ 'FormalName': topic_type, 'Scheme': 'RTT' }) SubElement(topic, 'FormalName', attrib={ 'Scheme': 'N2000' }).text = other_thing return topic_index
def get_reference_data(): """Get the schedule data for Sydney trains so that routes and departure times can be identified :return: """ # Get the path where the reference data is stored path = get_filepath('sydney_trains') # Request to get the current response headers response = requests.head('https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains', headers={'Authorization': 'apikey {}'.format(app.config.get('SYDNEY_TRANSPORT_API_KEY', '')), 'Accept': 'application/octet-stream'}) response.raise_for_status() # Get the name of the currently available file fname = re.findall("filename=(.+)", response.headers["Content-Disposition"])[0] # Get the path to the file new file zpath = Path(str(path) + '/' + fname) # If the current file exists we are up to date up_to_date = zpath.exists() # Need to retrieve the current file if not up_to_date: response = requests.get('https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains', headers={'Authorization': 'apikey {}'.format(app.config.get('SYDNEY_TRANSPORT_API_KEY', '')), 'Accept': 'application/octet-stream'}) response.raise_for_status() # Remove the old version of the file for f in glob.glob(str(path) + '/*.zip'): os.remove(f) # extract the contents from the new one zipfile = ZipFile(BytesIO(response.content)) zipfile.extractall(str(path)) with open(str(path) + '/' + fname, 'wb') as f: f.write(response.content) f.close() agency_file = zipfile.open('agency.txt') if not up_to_date else open(str(path) + '/agency.txt', 'rb') file = io.TextIOWrapper(agency_file, encoding='utf-8') agency_list = [] for row in csv.DictReader(file): agency_list.append(row) agency_file.close() trips_file = zipfile.open('trips.txt') if not up_to_date else open(str(path) + '/trips.txt', 'rb') file = io.TextIOWrapper(trips_file, encoding='utf-8') for row in csv.DictReader(file): trips[row.get('trip_id')] = row trips_file.close() routes_file = zipfile.open('routes.txt') if not up_to_date else open(str(path) + '/routes.txt', 'rb') file = io.TextIOWrapper(routes_file, encoding='utf-8') for row in csv.DictReader(file): routes[row.get('route_id')] = row routes_file.close() stops_file = zipfile.open('stops.txt') if not up_to_date else open(str(path) + '/stops.txt', 'rb') file = io.TextIOWrapper(stops_file, encoding='utf-8') for row in csv.DictReader(file): stops[row.get('stop_id')] = row stops_file.close() if not up_to_date: stop_time_file = zipfile.open('stop_times.txt') if not up_to_date \ else open(str(path) + '/stop_times.txt', 'rb') file = io.TextIOWrapper(stop_time_file, encoding='utf-8') stop_times = csv.DictReader(file) for row in stop_times: # save only the first stop if times.get(row.get('trip_id')): if times.get(row.get('trip_id')).get('stop_sequence') < row.get('stop_sequence'): times[row.get('trip_id')] = {'stop_sequence': row.get('stop_sequence'), 'departure_time': row.get('departure_time')} else: times[row.get('trip_id')] = {'stop_sequence': row.get('stop_sequence'), 'departure_time': row.get('departure_time')} with open(str(path) + '/filtered_stop_times.json', 'w') as times_out: json.dump(times, times_out) times_out.close() stop_time_file.close() else: times_out = open(str(path) + '/filtered_stop_times.json', 'r') times.update(json.load(times_out))
def get_reference_data(): """Get the schedule data for Sydney trains so that routes and departure times can be identified :return: """ # Get the path where the reference data is stored path = get_filepath('sydney_trains') # Request to get the current response headers response = requests.head( 'https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains', headers={ 'Authorization': 'apikey {}'.format( app.config.get('SYDNEY_TRANSPORT_API_KEY', '')), 'Accept': 'application/octet-stream' }) response.raise_for_status() # Get the name of the currently available file fname = re.findall("filename=(.+)", response.headers["Content-Disposition"])[0] # Get the path to the file new file zpath = Path(str(path) + '/' + fname) # If the current file exists we are up to date up_to_date = zpath.exists() # Need to retrieve the current file if not up_to_date: response = requests.get( 'https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains', headers={ 'Authorization': 'apikey {}'.format( app.config.get('SYDNEY_TRANSPORT_API_KEY', '')), 'Accept': 'application/octet-stream' }) response.raise_for_status() # Remove the old version of the file for f in glob.glob(str(path) + '/*.zip'): os.remove(f) # extract the contents from the new one zipfile = ZipFile(BytesIO(response.content)) zipfile.extractall(str(path)) with open(str(path) + '/' + fname, 'wb') as f: f.write(response.content) f.close() agency_file = zipfile.open('agency.txt') if not up_to_date else open( str(path) + '/agency.txt', 'rb') file = io.TextIOWrapper(agency_file, encoding='utf-8') agency_list = [] for row in csv.DictReader(file): agency_list.append(row) agency_file.close() trips_file = zipfile.open('trips.txt') if not up_to_date else open( str(path) + '/trips.txt', 'rb') file = io.TextIOWrapper(trips_file, encoding='utf-8') for row in csv.DictReader(file): trips[row.get('trip_id')] = row trips_file.close() routes_file = zipfile.open('routes.txt') if not up_to_date else open( str(path) + '/routes.txt', 'rb') file = io.TextIOWrapper(routes_file, encoding='utf-8') for row in csv.DictReader(file): routes[row.get('route_id')] = row routes_file.close() stops_file = zipfile.open('stops.txt') if not up_to_date else open( str(path) + '/stops.txt', 'rb') file = io.TextIOWrapper(stops_file, encoding='utf-8') for row in csv.DictReader(file): stops[row.get('stop_id')] = row stops_file.close() if not up_to_date: stop_time_file = zipfile.open('stop_times.txt') if not up_to_date \ else open(str(path) + '/stop_times.txt', 'rb') file = io.TextIOWrapper(stop_time_file, encoding='utf-8') stop_times = csv.DictReader(file) for row in stop_times: # save only the first stop if times.get(row.get('trip_id')): if times.get(row.get('trip_id')).get( 'stop_sequence') < row.get('stop_sequence'): times[row.get('trip_id')] = { 'stop_sequence': row.get('stop_sequence'), 'departure_time': row.get('departure_time') } else: times[row.get('trip_id')] = { 'stop_sequence': row.get('stop_sequence'), 'departure_time': row.get('departure_time') } with open(str(path) + '/filtered_stop_times.json', 'w') as times_out: json.dump(times, times_out) times_out.close() stop_time_file.close() else: times_out = open(str(path) + '/filtered_stop_times.json', 'r') times.update(json.load(times_out))
def golf_collation(item, **kwargs): """ Collates a number of Golf results into a single story. It uses the location of the input item to filter the included stories. It expects the name of the golf course (links) to be in the slugline Stories will be included based on the order of the slugline If grouping result into regions it expect the region name to be in the anpa_take_key of the input item :param item: :param kwargs: :return: """ def get_desk(): """ Search for a desk on the system with the name "Copytakers" :return: """ logger.info('Fetching the ObjectID for the desk "Copytakers".') query = {'name': 'Copytakers'} req = ParsedRequest() req.where = json.dumps(query) desk_service = get_resource_service('desks') desk_item = list(desk_service.get_from_mongo(req=req, lookup=None)) if not desk_item: raise ('Failed to find the a desk called "Copytakers".') desk_id = desk_item[0]['_id'] logger.info('ObjectID for the desk Copytakers is {}.'.format(desk_id)) return desk_item[0] def get_hold_stages(desk_id): """ Get any stages on the passed desk that have the word Hold in their name :param desk_id: :return: """ lookup = { '$and': [{ 'name': { '$regex': 'Hold', '$options': 'i' } }, { 'desk': str(desk_id) }] } stages = get_resource_service('stages').get(req=None, lookup=lookup) return stages def get_result_items(location, desk_id, stage_ids, midnight_utc): """ Need to find all stories the need to be collated The subject should be golf The place should match that of the story the macro is being run against The slugline should not start with 'Golf Results' (output story will have this slugline) The story should be updated/created since midnight Should be on the copy takers desk maybe hold stage? Not spiked Not already a collated story :param location: :param desk_id: :param stage_ids: :param midnight_utc: :return: """ query = { "query": { "filtered": { "filter": { "bool": { "must": [{ "term": { "place.qcode": location.get("qcode") } }, { "term": { "subject.qcode": "15027000" } }, { "term": { "task.desk": str(desk_id) } }, { "terms": { "task.stage": stage_ids } }, { "range": { "versioncreated": { "gte": midnight_utc } } }], "must_not": [{ "term": { "state": "spiked" } }, { "query": { "match_phrase_prefix": { "slugline": "Golf Results" } } }] } } } }, "sort": [{ "slugline": "asc" }], "size": 200 } req = ParsedRequest() repos = 'archive' req.args = {'source': json.dumps(query), 'repo': repos} return get_resource_service('search').get(req=req, lookup=None) if 'place' not in item or len(item.get('place')) != 1: raise Exception( 'The story you' 're running the macro on must have a single place defined') location = item.get('place')[0] # Read the file that groups golf courses into regions path = get_filepath('golf_links.json') try: with path.open('r') as f: regions = json.load(f) except Exception as ex: logger.error('Exception loading golf_links.json : {}'.format(ex)) copytakers_desk = get_desk() # Attempt to get the hold stages for the Copytakers desk stages = get_hold_stages(copytakers_desk.get('_id')) stage_ids = [str(s.get('_id')) for s in stages] if len(stage_ids) == 0: raise Exception('No hold stages found on desk "{}"'.format( copytakers_desk.get('name'))) # Get the local midnight in UTC midnight_utc = datetime.now(pytz.timezone(app.config['DEFAULT_TIMEZONE']))\ .replace(hour=0, minute=0, second=0, microsecond=0).astimezone(pytz.utc).isoformat()[:19] + 'z' # List of golf courses to include, if grouping by region links = None # A flag that indicates if all regions are to be included collated_grouped = False # Get any any entry from the golf links file for the state defined in the location of the item story state_regions = [ s for s in regions.get('states') if s.get('state') == location.get('qcode') ] if len(state_regions): state_region = state_regions[0] # Match the value in the take key to any region in the links file region = [ r for r in state_region.get('regions') if item.get('anpa_take_key', '') and r.get('name', '').lower() == item.get('anpa_take_key', '').lower() ] if len(region): links = region[0].get('links', []) else: # If no match is found then it is assumed that a collated story of all regions is to be produced. collated_grouped = True items = sorted(list( get_result_items(location, copytakers_desk.get('_id'), stage_ids, midnight_utc)), key=lambda s: s.get('slugline', '').lower()) body = '' if collated_grouped: # keep a set of the golf links that have been include so as not to include them multiple times include_links = set() for region in state_region.get('regions'): body += '<p>' + region.get('name') + '</p>' for i in items: for l in region.get('links'): if l.lower().startswith( i.get('slugline', '').lower()) and l not in include_links: body += i.get('body_html') include_links.add(l) else: for i in items: if links: for l in links: if l.lower().startswith(i.get('slugline', '').lower()): body += i.get('body_html') else: body += i.get('body_html') if not links: dayname = datetime.now(pytz.timezone( app.config['DEFAULT_TIMEZONE'])).strftime('%A') item['anpa_take_key'] = location.get('state', '') + ' ' + dayname item['body_html'] = body item['slugline'] = 'Golf Results' return item