コード例 #1
0
def get_areas():
    """
    Read the GeoJson file that outlines a list of named polygons describing the areas we can extract the prices from
    :return:
    """
    areas = []
    path = get_filepath('fuel_geojson.json')
    try:
        with path.open('r') as f:
            areas = json.load(f)
    except Exception as ex:
        logger.error('Exception loading fuel_geojson.json : {}'.format(ex))
    return areas
コード例 #2
0
def get_areas():
    """
    Read the GeoJson file that outlines a list of named polygons describing the areas we can extract the prices from
    :return:
    """
    areas = []
    path = get_filepath('fuel_geojson.json')
    try:
        with path.open('r') as f:
            areas = json.load(f)
    except Exception as ex:
        logger.error('Exception loading fuel_geojson.json : {}'.format(ex))
    return areas
コード例 #3
0
    def _get_topics(self, formatted_article, topic_set, topic_index):
        """
        Using the Reuters catalog map the IPTC codes to the Reuters topics

        :param formatted_article:
        :param topic_set:
        :param topic_index:
        :return:
        """
        # Some AAP IPTC codes are spcificaly mapped to N2000 codes
        aap_map = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='reuters_iptc_n2000_map')
        path = get_filepath('topicset-reuters-3rdParty_news2000.xml')
        tree = etree.parse(str(path))
        for subject in formatted_article.get('subject', []):
            if aap_map:
                aap_mapped = [x for x in aap_map.get('items', []) if x.get('qcode', '') == subject.get('qcode', '')]
                if len(aap_mapped) == 1:
                    n2000_code = aap_mapped[0].get('name')
                    topic = tree.xpath(
                        './NewsItem/TopicSet/Topic/FormalName[text()="' + n2000_code + '"]')
                    if topic and len(topic) == 1:
                        topic_type = str(topic[0].xpath('../TopicType/@FormalName')[0])

                        topic_index = topic_index + 1
                        topic = SubElement(topic_set, 'Topic', attrib={'Duid': 'T{num:04d}'.format(num=topic_index)})
                        SubElement(topic, 'TopicType', attrib={'FormalName': topic_type, 'Scheme': 'RTT'})
                        SubElement(topic, 'FormalName', attrib={'Scheme': 'N2000'}).text = n2000_code
                    break

            topics = tree.xpath('./NewsItem/TopicSet/Topic/FormalName[text()="iptc:' + subject.get('qcode', '') + '"]')
            if len(topics) == 1:
                topic_type = str(topics[0].xpath('../TopicType/@FormalName')[0])
                other_thing = topics[0].xpath('../FormalName[@Scheme="N2000"]')[0].text
                topic_index = topic_index + 1
                topic = SubElement(topic_set, 'Topic', attrib={'Duid': 'T{num:04d}'.format(num=topic_index)})
                SubElement(topic, 'TopicType', attrib={'FormalName': topic_type, 'Scheme': 'RTT'})
                SubElement(topic, 'FormalName', attrib={'Scheme': 'N2000'}).text = other_thing
        return topic_index
コード例 #4
0
    def run(self, id):
        self.eocstat_map = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='eventoccurstatus')

        # You will need to generate a credentials file
        creds = str(get_filepath('Quickstart-44dd51f59d5a.json'))

        credentials = ServiceAccountCredentials.from_json_keyfile_name(creds, self.scope)

        gc = gspread.authorize(credentials)

        sheet = gc.open("Sport Fixtures for Superdesk Planning Tool")

        # Scan the worksheets skipping the first
        cities = app.locators.find_cities()
        for wks in sheet.worksheets()[1:]:
            # print(wks.title)
            dates = wks.col_values(1, value_render_option='UNFORMATTED_VALUE')
            all_vals = wks.get_all_values()
            for v in all_vals:
                self.get_time_zone(cities, v, wks.title)

            self._process_sheet(wks.title, dates, all_vals, wks.id)
            time.sleep(2)
コード例 #5
0
    def _get_topics(self, formatted_article, topic_set, topic_index):
        """
        Using the Reuters catalog map the IPTC codes to the Reuters topics

        :param formatted_article:
        :param topic_set:
        :param topic_index:
        :return:
        """
        # Some AAP IPTC codes are spcificaly mapped to N2000 codes
        aap_map = superdesk.get_resource_service('vocabularies').find_one(
            req=None, _id='reuters_iptc_n2000_map')
        path = get_filepath('topicset-reuters-3rdParty_news2000.xml')
        tree = etree.parse(str(path))
        for subject in formatted_article.get('subject', []):
            if aap_map:
                aap_mapped = [
                    x for x in aap_map.get('items', [])
                    if x.get('qcode', '') == subject.get('qcode', '')
                ]
                if len(aap_mapped) == 1:
                    n2000_code = aap_mapped[0].get('name')
                    topic = tree.xpath(
                        './NewsItem/TopicSet/Topic/FormalName[text()="' +
                        n2000_code + '"]')
                    if topic and len(topic) == 1:
                        topic_type = str(
                            topic[0].xpath('../TopicType/@FormalName')[0])

                        topic_index = topic_index + 1
                        topic = SubElement(
                            topic_set,
                            'Topic',
                            attrib={
                                'Duid': 'T{num:04d}'.format(num=topic_index)
                            })
                        SubElement(topic,
                                   'TopicType',
                                   attrib={
                                       'FormalName': topic_type,
                                       'Scheme': 'RTT'
                                   })
                        SubElement(topic,
                                   'FormalName',
                                   attrib={
                                       'Scheme': 'N2000'
                                   }).text = n2000_code
                    break

            topics = tree.xpath(
                './NewsItem/TopicSet/Topic/FormalName[text()="iptc:' +
                subject.get('qcode', '') + '"]')
            if len(topics) == 1:
                topic_type = str(
                    topics[0].xpath('../TopicType/@FormalName')[0])
                other_thing = topics[0].xpath(
                    '../FormalName[@Scheme="N2000"]')[0].text
                topic_index = topic_index + 1
                topic = SubElement(
                    topic_set,
                    'Topic',
                    attrib={'Duid': 'T{num:04d}'.format(num=topic_index)})
                SubElement(topic,
                           'TopicType',
                           attrib={
                               'FormalName': topic_type,
                               'Scheme': 'RTT'
                           })
                SubElement(topic, 'FormalName', attrib={
                    'Scheme': 'N2000'
                }).text = other_thing
        return topic_index
コード例 #6
0
    def get_reference_data():
        """Get the schedule data for Sydney trains so that routes and departure times can be identified

        :return:
        """

        # Get the path where the reference data is stored
        path = get_filepath('sydney_trains')

        # Request to get the current response headers
        response = requests.head('https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains',
                                 headers={'Authorization':
                                          'apikey {}'.format(app.config.get('SYDNEY_TRANSPORT_API_KEY', '')),
                                          'Accept': 'application/octet-stream'})
        response.raise_for_status()

        # Get the name of the currently available file
        fname = re.findall("filename=(.+)", response.headers["Content-Disposition"])[0]

        # Get the path to the file new file
        zpath = Path(str(path) + '/' + fname)

        # If the current file exists we are up to date
        up_to_date = zpath.exists()

        # Need to retrieve the current file
        if not up_to_date:
            response = requests.get('https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains',
                                    headers={'Authorization':
                                             'apikey {}'.format(app.config.get('SYDNEY_TRANSPORT_API_KEY', '')),
                                             'Accept': 'application/octet-stream'})
            response.raise_for_status()

            # Remove the old version of the file
            for f in glob.glob(str(path) + '/*.zip'):
                os.remove(f)

            # extract the contents from the new one
            zipfile = ZipFile(BytesIO(response.content))
            zipfile.extractall(str(path))

            with open(str(path) + '/' + fname, 'wb') as f:
                f.write(response.content)
                f.close()

        agency_file = zipfile.open('agency.txt') if not up_to_date else open(str(path) + '/agency.txt', 'rb')
        file = io.TextIOWrapper(agency_file, encoding='utf-8')
        agency_list = []
        for row in csv.DictReader(file):
            agency_list.append(row)
        agency_file.close()

        trips_file = zipfile.open('trips.txt') if not up_to_date else open(str(path) + '/trips.txt', 'rb')
        file = io.TextIOWrapper(trips_file, encoding='utf-8')
        for row in csv.DictReader(file):
            trips[row.get('trip_id')] = row
        trips_file.close()

        routes_file = zipfile.open('routes.txt') if not up_to_date else open(str(path) + '/routes.txt', 'rb')
        file = io.TextIOWrapper(routes_file, encoding='utf-8')
        for row in csv.DictReader(file):
            routes[row.get('route_id')] = row
        routes_file.close()

        stops_file = zipfile.open('stops.txt') if not up_to_date else open(str(path) + '/stops.txt', 'rb')
        file = io.TextIOWrapper(stops_file, encoding='utf-8')
        for row in csv.DictReader(file):
            stops[row.get('stop_id')] = row
        stops_file.close()

        if not up_to_date:
            stop_time_file = zipfile.open('stop_times.txt') if not up_to_date \
                else open(str(path) + '/stop_times.txt', 'rb')
            file = io.TextIOWrapper(stop_time_file, encoding='utf-8')
            stop_times = csv.DictReader(file)
            for row in stop_times:
                # save only the first stop
                if times.get(row.get('trip_id')):
                    if times.get(row.get('trip_id')).get('stop_sequence') < row.get('stop_sequence'):
                        times[row.get('trip_id')] = {'stop_sequence': row.get('stop_sequence'),
                                                     'departure_time': row.get('departure_time')}
                else:
                    times[row.get('trip_id')] = {'stop_sequence': row.get('stop_sequence'),
                                                 'departure_time': row.get('departure_time')}
            with open(str(path) + '/filtered_stop_times.json', 'w') as times_out:
                json.dump(times, times_out)
            times_out.close()
            stop_time_file.close()
        else:
            times_out = open(str(path) + '/filtered_stop_times.json', 'r')
            times.update(json.load(times_out))
コード例 #7
0
    def get_reference_data():
        """Get the schedule data for Sydney trains so that routes and departure times can be identified

        :return:
        """

        # Get the path where the reference data is stored
        path = get_filepath('sydney_trains')

        # Request to get the current response headers
        response = requests.head(
            'https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains',
            headers={
                'Authorization':
                'apikey {}'.format(
                    app.config.get('SYDNEY_TRANSPORT_API_KEY', '')),
                'Accept':
                'application/octet-stream'
            })
        response.raise_for_status()

        # Get the name of the currently available file
        fname = re.findall("filename=(.+)",
                           response.headers["Content-Disposition"])[0]

        # Get the path to the file new file
        zpath = Path(str(path) + '/' + fname)

        # If the current file exists we are up to date
        up_to_date = zpath.exists()

        # Need to retrieve the current file
        if not up_to_date:
            response = requests.get(
                'https://api.transport.nsw.gov.au/v1/gtfs/schedule/sydneytrains',
                headers={
                    'Authorization':
                    'apikey {}'.format(
                        app.config.get('SYDNEY_TRANSPORT_API_KEY', '')),
                    'Accept':
                    'application/octet-stream'
                })
            response.raise_for_status()

            # Remove the old version of the file
            for f in glob.glob(str(path) + '/*.zip'):
                os.remove(f)

            # extract the contents from the new one
            zipfile = ZipFile(BytesIO(response.content))
            zipfile.extractall(str(path))

            with open(str(path) + '/' + fname, 'wb') as f:
                f.write(response.content)
                f.close()

        agency_file = zipfile.open('agency.txt') if not up_to_date else open(
            str(path) + '/agency.txt', 'rb')
        file = io.TextIOWrapper(agency_file, encoding='utf-8')
        agency_list = []
        for row in csv.DictReader(file):
            agency_list.append(row)
        agency_file.close()

        trips_file = zipfile.open('trips.txt') if not up_to_date else open(
            str(path) + '/trips.txt', 'rb')
        file = io.TextIOWrapper(trips_file, encoding='utf-8')
        for row in csv.DictReader(file):
            trips[row.get('trip_id')] = row
        trips_file.close()

        routes_file = zipfile.open('routes.txt') if not up_to_date else open(
            str(path) + '/routes.txt', 'rb')
        file = io.TextIOWrapper(routes_file, encoding='utf-8')
        for row in csv.DictReader(file):
            routes[row.get('route_id')] = row
        routes_file.close()

        stops_file = zipfile.open('stops.txt') if not up_to_date else open(
            str(path) + '/stops.txt', 'rb')
        file = io.TextIOWrapper(stops_file, encoding='utf-8')
        for row in csv.DictReader(file):
            stops[row.get('stop_id')] = row
        stops_file.close()

        if not up_to_date:
            stop_time_file = zipfile.open('stop_times.txt') if not up_to_date \
                else open(str(path) + '/stop_times.txt', 'rb')
            file = io.TextIOWrapper(stop_time_file, encoding='utf-8')
            stop_times = csv.DictReader(file)
            for row in stop_times:
                # save only the first stop
                if times.get(row.get('trip_id')):
                    if times.get(row.get('trip_id')).get(
                            'stop_sequence') < row.get('stop_sequence'):
                        times[row.get('trip_id')] = {
                            'stop_sequence': row.get('stop_sequence'),
                            'departure_time': row.get('departure_time')
                        }
                else:
                    times[row.get('trip_id')] = {
                        'stop_sequence': row.get('stop_sequence'),
                        'departure_time': row.get('departure_time')
                    }
            with open(str(path) + '/filtered_stop_times.json',
                      'w') as times_out:
                json.dump(times, times_out)
            times_out.close()
            stop_time_file.close()
        else:
            times_out = open(str(path) + '/filtered_stop_times.json', 'r')
            times.update(json.load(times_out))
コード例 #8
0
def golf_collation(item, **kwargs):
    """
    Collates a number of Golf results into a single story.
    It uses the location of the input item to filter the included stories.
    It expects the name of the golf course (links) to be in the slugline
    Stories will be included based on the order of the slugline
    If grouping result into regions it expect the region name to be in the anpa_take_key of the input item
    :param item:
    :param kwargs:
    :return:
    """
    def get_desk():
        """
        Search for a desk on the system with the name "Copytakers"
        :return:
        """
        logger.info('Fetching the ObjectID for the desk "Copytakers".')
        query = {'name': 'Copytakers'}
        req = ParsedRequest()
        req.where = json.dumps(query)

        desk_service = get_resource_service('desks')
        desk_item = list(desk_service.get_from_mongo(req=req, lookup=None))
        if not desk_item:
            raise ('Failed to find the a desk called "Copytakers".')

        desk_id = desk_item[0]['_id']
        logger.info('ObjectID for the desk Copytakers is {}.'.format(desk_id))
        return desk_item[0]

    def get_hold_stages(desk_id):
        """
        Get any stages on the passed desk that have the word Hold in their name
        :param desk_id:
        :return:
        """
        lookup = {
            '$and': [{
                'name': {
                    '$regex': 'Hold',
                    '$options': 'i'
                }
            }, {
                'desk': str(desk_id)
            }]
        }
        stages = get_resource_service('stages').get(req=None, lookup=lookup)
        return stages

    def get_result_items(location, desk_id, stage_ids, midnight_utc):
        """
        Need to find all stories the need to be collated
        The subject should be golf
        The place should match that of the story the macro is being run against
        The slugline should not start with 'Golf Results' (output story will have this slugline)
        The story should be updated/created since midnight
        Should be on the copy takers desk maybe hold stage?
        Not spiked
        Not already a collated story
        :param location:
        :param desk_id:
        :param stage_ids:
        :param midnight_utc:
        :return:
        """
        query = {
            "query": {
                "filtered": {
                    "filter": {
                        "bool": {
                            "must": [{
                                "term": {
                                    "place.qcode": location.get("qcode")
                                }
                            }, {
                                "term": {
                                    "subject.qcode": "15027000"
                                }
                            }, {
                                "term": {
                                    "task.desk": str(desk_id)
                                }
                            }, {
                                "terms": {
                                    "task.stage": stage_ids
                                }
                            }, {
                                "range": {
                                    "versioncreated": {
                                        "gte": midnight_utc
                                    }
                                }
                            }],
                            "must_not": [{
                                "term": {
                                    "state": "spiked"
                                }
                            }, {
                                "query": {
                                    "match_phrase_prefix": {
                                        "slugline": "Golf Results"
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            "sort": [{
                "slugline": "asc"
            }],
            "size": 200
        }

        req = ParsedRequest()
        repos = 'archive'
        req.args = {'source': json.dumps(query), 'repo': repos}
        return get_resource_service('search').get(req=req, lookup=None)

    if 'place' not in item or len(item.get('place')) != 1:
        raise Exception(
            'The story you'
            're running the macro on must have a single place defined')
    location = item.get('place')[0]

    # Read the file that groups golf courses into regions
    path = get_filepath('golf_links.json')
    try:
        with path.open('r') as f:
            regions = json.load(f)
    except Exception as ex:
        logger.error('Exception loading golf_links.json : {}'.format(ex))

    copytakers_desk = get_desk()

    # Attempt to get the hold stages for the Copytakers desk
    stages = get_hold_stages(copytakers_desk.get('_id'))
    stage_ids = [str(s.get('_id')) for s in stages]
    if len(stage_ids) == 0:
        raise Exception('No hold stages found on desk "{}"'.format(
            copytakers_desk.get('name')))

    # Get the local midnight in UTC
    midnight_utc = datetime.now(pytz.timezone(app.config['DEFAULT_TIMEZONE']))\
        .replace(hour=0, minute=0, second=0, microsecond=0).astimezone(pytz.utc).isoformat()[:19] + 'z'

    # List of golf courses to include, if grouping by region
    links = None
    # A flag that indicates if all regions are to be included
    collated_grouped = False

    # Get any any entry from the golf links file for the state defined in the location of the item story
    state_regions = [
        s for s in regions.get('states')
        if s.get('state') == location.get('qcode')
    ]
    if len(state_regions):
        state_region = state_regions[0]
        # Match the value in the take key to any region in the links file
        region = [
            r for r in state_region.get('regions')
            if item.get('anpa_take_key', '') and r.get('name', '').lower() ==
            item.get('anpa_take_key', '').lower()
        ]
        if len(region):
            links = region[0].get('links', [])
        else:
            # If no match is found then it is assumed that a collated story of all regions is to be produced.
            collated_grouped = True

    items = sorted(list(
        get_result_items(location, copytakers_desk.get('_id'), stage_ids,
                         midnight_utc)),
                   key=lambda s: s.get('slugline', '').lower())
    body = ''
    if collated_grouped:
        # keep a set of the golf links that have been include so as not to include them multiple times
        include_links = set()
        for region in state_region.get('regions'):
            body += '<p>' + region.get('name') + '</p>'
            for i in items:
                for l in region.get('links'):
                    if l.lower().startswith(
                            i.get('slugline',
                                  '').lower()) and l not in include_links:
                        body += i.get('body_html')
                        include_links.add(l)
    else:
        for i in items:
            if links:
                for l in links:
                    if l.lower().startswith(i.get('slugline', '').lower()):
                        body += i.get('body_html')
            else:
                body += i.get('body_html')

    if not links:
        dayname = datetime.now(pytz.timezone(
            app.config['DEFAULT_TIMEZONE'])).strftime('%A')
        item['anpa_take_key'] = location.get('state', '') + ' ' + dayname

    item['body_html'] = body
    item['slugline'] = 'Golf Results'

    return item