Esempio n. 1
0
    def post(self):
        logging.info('(Params: %s )', self.request.params.items())

        # fetch source page
        source = util.get_required_param(self, 'source')
        source_resp = common.requests_get(source)
        self.source_url = source_resp.url or source
        self.source_domain = urlparse.urlparse(self.source_url).netloc.split(':')[0]
        self.source_mf2 = mf2py.parse(source_resp.text, url=self.source_url, img_with_alt=True)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(self.source_mf2, indent=2))

        # check for backlink to bridgy fed (for webmention spec and to confirm
        # source's intent to federate to mastodon)
        if (self.request.host_url not in source_resp.text and
            urllib.quote(self.request.host_url, safe='') not in source_resp.text):
            common.error(self, "Couldn't find link to %s" % self.request.host_url)

        # convert source page to ActivityStreams
        entry = mf2util.find_first_entry(self.source_mf2, ['h-entry'])
        if not entry:
            common.error(self, 'No microformats2 found on %s' % self.source_url)

        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [self.source_url]

        self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS1: %s', json.dumps(self.source_obj, indent=2))

        self.try_activitypub() or self.try_salmon()
Esempio n. 2
0
    def get(self, domain):
        url = 'http://%s/' % domain
        resp = common.requests_get(url)
        mf2 = mf2py.parse(resp.text, url=resp.url, img_with_alt=True)
        # logging.info('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2))

        hcard = mf2util.representative_hcard(mf2, resp.url)
        logging.info('Representative h-card: %s', json.dumps(hcard, indent=2))
        if not hcard:
            common.error(
                self, """\
Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s"""
                % resp.url)

        key = MagicKey.get_or_create(domain)
        obj = common.postprocess_as2(as2.from_as1(
            microformats2.json_to_object(hcard)),
                                     key=key)
        obj.update({
            'inbox':
            '%s/%s/inbox' % (appengine_config.HOST_URL, domain),
            'outbox':
            '%s/%s/outbox' % (appengine_config.HOST_URL, domain),
            'following':
            '%s/%s/following' % (appengine_config.HOST_URL, domain),
            'followers':
            '%s/%s/followers' % (appengine_config.HOST_URL, domain),
        })
        logging.info('Returning: %s', json.dumps(obj, indent=2))

        self.response.headers.update({
            'Content-Type': common.CONTENT_TYPE_AS2,
            'Access-Control-Allow-Origin': '*',
        })
        self.response.write(json.dumps(obj, indent=2))
Esempio n. 3
0
def call_fieldclimate_api(apiRoute, publicKey, privateKey, filename):
    auth = AuthHmacMetosGet(apiRoute, publicKey, privateKey)
    response = common.requests_get(url=apiURI + apiRoute, headers={'Accept': 'application/json'}, auth=auth)
    parsed = json.loads(response.text)
    # print(response.json())
    pretty_resp = json.dumps(parsed, indent=4, sort_keys=True)
    # print(pretty_resp)
    resp_file = open(f'{credentials.path}json/{filename}.json', 'w+')
    resp_file.write(pretty_resp)

    normalized = pd.json_normalize(parsed)
    return pretty_resp, normalized
Esempio n. 4
0
def wait_for_idle(harvester_id):
    while True:
        print(f'Checking status of harvester "{harvester_id}"...')
        resp = common.requests_get(
            url=
            f'https://basel-stadt.opendatasoft.com/api/management/v2/harvesters/{harvester_id}/',
            auth=(credentials.ods_user, credentials.ods_password),
            proxies={'https': credentials.proxy})
        handle_http_errors(resp)
        status = resp.json()['status']
        print(f'Harvester "{harvester_id}" is "{status}".')
        if status == 'idle':
            break
        else:
            seconds = 10
            print(f'Waiting {seconds} seconds before trying again...')
            time.sleep(seconds)
Esempio n. 5
0
def add_wm(url=None):
    """Proxies HTTP requests and adds Link header to our webmention endpoint."""
    url = urllib.parse.unquote(url)
    if not url.startswith('http://') and not url.startswith('https://'):
        error('URL must start with http:// or https://')

    try:
        got = common.requests_get(url)
    except requests.exceptions.Timeout as e:
        error(str(e), status=504, exc_info=True)
    except requests.exceptions.RequestException as e:
        error(str(e), status=502, exc_info=True)

    resp = flask.make_response(got.content, got.status_code, dict(got.headers))
    resp.headers.add(
        'Link', LINK_HEADER %
        (request.args.get('endpoint') or request.host_url + 'webmention'))
    return resp
Esempio n. 6
0
    def get(self, url):
        url = urllib.parse.unquote(url)
        if not url.startswith('http://') and not url.startswith('https://'):
            self.error('URL must start with http:// or https://')

        try:
            resp = common.requests_get(url)
        except requests.exceptions.Timeout as e:
            self.error(str(e), status=504, exc_info=True)
        except requests.exceptions.RequestException as e:
            self.error(str(e), status=502, exc_info=True)

        self.response.status_int = resp.status_code
        self.response.write(resp.content)

        endpoint = LINK_HEADER % (str(self.request.get('endpoint'))
                                  or self.request.host_url + '/webmention')
        self.response.headers.clear()
        self.response.headers.update(resp.headers)
        self.response.headers.add('Link', endpoint)
Esempio n. 7
0
    def dispatch_request(self):
        logging.info(f'Params: {list(request.form.items())}')

        # fetch source page
        source = flask_util.get_required_param('source')
        source_resp = common.requests_get(source)
        self.source_url = source_resp.url or source
        self.source_domain = urllib.parse.urlparse(
            self.source_url).netloc.split(':')[0]
        self.source_mf2 = util.parse_mf2(source_resp)

        # logging.debug(f'Parsed mf2 for {source_resp.url} : {json_dumps(self.source_mf2 indent=2)}')

        # check for backlink to bridgy fed (for webmention spec and to confirm
        # source's intent to federate to mastodon)
        if (request.host_url not in source_resp.text and urllib.parse.quote(
                request.host_url, safe='') not in source_resp.text):
            error("Couldn't find link to {request.host_url}")

        # convert source page to ActivityStreams
        entry = mf2util.find_first_entry(self.source_mf2, ['h-entry'])
        if not entry:
            error(f'No microformats2 found on {self.source_url}')

        logging.info(f'First entry: {json_dumps(entry, indent=2)}')
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [self.source_url]

        self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info(
            f'Converted to AS1: {json_dumps(self.source_obj, indent=2)}')

        for method in self.try_activitypub, self.try_salmon:
            ret = method()
            if ret:
                return ret

        return ''
Esempio n. 8
0
def retrieve_vmdl_data():
    print(f'Retrieving vmdl data...')
    payload_token = f'client_id={credentials.vmdl_client_id}&scope={credentials.vmdl_scope}&username={credentials.vmdl_user}&password={credentials.vmdl_password}&grant_type=password'
    headers_token = {'Content-Type': 'application/x-www-form-urlencoded'}
    print(f'Getting OAUTH2 access token...')
    resp_token = requests.request("POST", credentials.vmdl_url_token, headers=headers_token, data=payload_token)
    resp_token.raise_for_status()
    # token_type = resp_token.json()['token_type']
    auth_string = f'Bearer {resp_token.json()["access_token"]}'

    payload_download={}
    headers_download = {'Authorization': auth_string}
    print(f'Downloading data...')
    resp_download = common.requests_get(credentials.vmdl_url_download, headers=headers_download, data=payload_download)
    resp_download.raise_for_status()
    file_path = os.path.join(credentials.vmdl_path, credentials.vmdl_file)
    print(f'Writing data to file {file_path}...')
    resp_download.encoding = 'utf-8'
    with open(file_path, "w") as f:
        f.write(resp_download.text)
    return file_path
Esempio n. 9
0
    def convert_to_as2(self, url):
        """Fetch a URL as HTML, convert it to AS2, and return it.

        Currently mainly for Pixelfed.
        https://github.com/snarfed/bridgy-fed/issues/39
        """
        resp = common.requests_get(url)
        mf2 = mf2py.parse(resp.text, url=resp.url, img_with_alt=True)
        entry = mf2util.find_first_entry(mf2, ['h-entry'])
        logging.info('Parsed mf2 for %s: %s', resp.url,
                     json.dumps(entry, indent=2))

        obj = common.postprocess_as2(
            as2.from_as1(microformats2.json_to_object(entry)))
        logging.info('Returning: %s', json.dumps(obj, indent=2))

        self.response.headers.update({
            'Content-Type': common.CONTENT_TYPE_AS2,
            'Access-Control-Allow-Origin': '*',
        })
        self.response.write(json.dumps(obj, indent=2))
Esempio n. 10
0
from bag_coronavirus import credentials
import os
import common
import pandas as pd

print(f"Getting today's data url...")
context_json = common.requests_get(
    url='https://www.covid19.admin.ch/api/data/context').json()
csv_daily_tests_url = context_json['sources']['individual']['csv']['daily'][
    'test']
print(f'Reading current csv from {csv_daily_tests_url} into data frame...')
df = pd.read_csv(csv_daily_tests_url)
print(f'Filtering out BS rows, some columns, and rename them...')
df_bs = df.query('geoRegion == "BS"')
df_bs = df_bs.filter(items=['datum', 'entries_neg', 'entries_pos', 'entries'])
df_bs[
    'positivity_rate_percent'] = df_bs['entries_pos'] / df_bs['entries'] * 100
df_bs['positivity_rate'] = df_bs['entries_pos'] / df_bs['entries']
df_bs = df_bs.rename(
    columns={
        'entries_neg': 'negative_tests',
        'entries_pos': 'positive_tests',
        'entries': 'total_tests'
    })
print(f'Calculating columns...')
df_bs['dayofweek'] = pd.to_datetime(df_bs['datum']).dt.dayofweek + 1
df_bs['weekday_nr'] = pd.to_datetime(df_bs['datum']).dt.dayofweek
df_bs['woche'] = pd.to_datetime(df_bs['datum']).dt.isocalendar().week

export_file_name = os.path.join(credentials.path, credentials.file_name)
print(f'Exporting to file {export_file_name}...')
Esempio n. 11
0
import common
from xml.etree import ElementTree
import requests

from bafu_hydrodaten import credentials

print(f'Connecting to HTTPS Server to read data...')

local_path = 'bafu_hydrodaten/data'
r = common.requests_get(url=credentials.https_live_url,
                        auth=(credentials.https_user, credentials.https_pass))

print(f'Parsing response XML...')
root = ElementTree.fromstring(r.content)
# for child in root:
#     print(child.tag, child.attrib)

timestamp = root.find(
    ".//*[@number='2289']/parameter[@type='2']/datetime").text
pegelstand = root.find(".//*[@number='2289']/parameter[@type='2']/value").text
abfluss = root.find(".//*[@number='2289']/parameter[@type='10']/value").text

print(f'current data: ')
print(f'Timestamp: {timestamp}')
print(f'Pegelstand: {pegelstand}')
print(f'Abfluss: {abfluss}')

print(f'Posting data to ods...')
payload = {
    'zeitstempel': timestamp,
    'pegel': pegelstand,
Esempio n. 12
0
    def template_vars(self, domain, url=None):
        assert domain

        if domain.split('.')[-1] in NON_TLDS:
            common.error(self,
                         "%s doesn't look like a domain" % domain,
                         status=404)

        # find representative h-card. try url, then url's home page, then domain
        urls = ['http://%s/' % domain]
        if url:
            urls = [url, urlparse.urljoin(url, '/')] + urls

        for candidate in urls:
            resp = common.requests_get(candidate)
            parsed = common.beautifulsoup_parse(resp.content,
                                                from_encoding=resp.encoding)
            mf2 = mf2py.parse(parsed, url=resp.url, img_with_alt=True)
            # logging.debug('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2))
            hcard = mf2util.representative_hcard(mf2, resp.url)
            if hcard:
                logging.info('Representative h-card: %s',
                             json.dumps(hcard, indent=2))
                break
        else:
            common.error(
                self, """\
Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s"""
                % resp.url)

        logging.info('Generating WebFinger data for %s', domain)
        key = models.MagicKey.get_or_create(domain)
        props = hcard.get('properties', {})
        urls = util.dedupe_urls(props.get('url', []) + [resp.url])
        canonical_url = urls[0]

        acct = '%s@%s' % (domain, domain)
        for url in urls:
            if url.startswith('acct:'):
                urluser, urldomain = util.parse_acct_uri(url)
                if urldomain == domain:
                    acct = '%s@%s' % (urluser, domain)
                    logging.info('Found custom username: acct:%s', acct)
                    break

        # discover atom feed, if any
        atom = parsed.find('link',
                           rel='alternate',
                           type=common.CONTENT_TYPE_ATOM)
        if atom and atom['href']:
            atom = urlparse.urljoin(resp.url, atom['href'])
        else:
            atom = 'https://granary.io/url?' + urllib.urlencode(
                {
                    'input': 'html',
                    'output': 'atom',
                    'url': resp.url,
                    'hub': resp.url,
                })

        # discover PuSH, if any
        for link in resp.headers.get('Link', '').split(','):
            match = common.LINK_HEADER_RE.match(link)
            if match and match.group(2) == 'hub':
                hub = match.group(1)
            else:
                hub = 'https://bridgy-fed.superfeedr.com/'

        # generate webfinger content
        data = util.trim_nulls({
            'subject':
            'acct:' + acct,
            'aliases':
            urls,
            'magic_keys': [{
                'value': key.href()
            }],
            'links':
            sum(([{
                'rel': 'http://webfinger.net/rel/profile-page',
                'type': 'text/html',
                'href': url,
            }] for url in urls if url.startswith("http")), []) + [{
                'rel': 'http://webfinger.net/rel/avatar',
                'href': url,
            } for url in props.get('photo', [])] + [
                {
                    'rel': 'canonical_uri',
                    'type': 'text/html',
                    'href': canonical_url,
                },

                # ActivityPub
                {
                    'rel': 'self',
                    'type': 'application/activity+json',
                    # use HOST_URL instead of e.g. request.host_url because it
                    # sometimes lost port, e.g. http://localhost:8080 would become
                    # just http://localhost. no clue how or why.
                    'href': '%s/%s' % (appengine_config.HOST_URL, domain),
                },
                {
                    'rel': 'inbox',
                    'type': 'application/activity+json',
                    'href': '%s/%s/inbox' %
                    (appengine_config.HOST_URL, domain),
                },

                # OStatus
                {
                    'rel': 'http://schemas.google.com/g/2010#updates-from',
                    'type': common.CONTENT_TYPE_ATOM,
                    'href': atom,
                },
                {
                    'rel': 'hub',
                    'href': hub,
                },
                {
                    'rel': 'magic-public-key',
                    'href': key.href(),
                },
                {
                    'rel': 'salmon',
                    'href': '%s/%s/salmon' %
                    (appengine_config.HOST_URL, domain),
                }
            ]
        })
        logging.info('Returning WebFinger data: %s', json.dumps(data,
                                                                indent=2))
        return data
Esempio n. 13
0
                # Load metadata from geocat.ch
                # See documentation at https://www.geocat.admin.ch/de/dokumentation/csw.html
                # For unknown reasons (probably proxy-related), requests always returns http error 404, so we have to revert to launching curl in a subprocess
                # curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "Accept: application/xml" -H "X-XSRF-TOKEN: a1284e46-b378-42a4-ac6a-d48069e05494"
                # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', params={'accept': 'application/json'}, proxies={'https': credentials.proxy})
                # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', headers={'accept': 'application/xml, application/json'}, proxies={'https': credentials.proxy})
                # cmd = 'curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "accept: application/json" -k'
                # args = shlex.split(cmd)

                # In some geocat URLs there's a tab character, remove it.
                geocat_uid = row['geocat'].rsplit('/', 1)[-1].replace('\t', '')
                geocat_url = f'https://www.geocat.ch/geonetwork/srv/api/0.1/records/{geocat_uid}'
                print(f'Getting metadata from {geocat_url}...')
                r = common.requests_get(
                    geocat_url,
                    headers={'accept': 'application/xml, application/json'},
                    proxies={'https': credentials.proxy})
                r.raise_for_status()
                metadata = r.json()

                # metadata_file = os.path.join(credentials.path_root, 'metadata', geocat_uid + '.json')
                # cmd = '/usr/bin/curl --proxy ' + credentials.proxy + ' "https://www.geocat.ch/geonetwork/srv/api/0.1/records/' + geocat_uid + '" -H "accept: application/json" -s -k > ' + metadata_file
                # print('Running curl to get geocat.ch metadata: ')
                # resp = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE)
                # print('Processing geocat.ch metadata file ' + metadata_file + '...')
                # with open(metadata_file, 'r') as json_file:
                #     print('Adding shape ' + shpfilename_noext + ' to harverster csv...')
                #     json_string = json_file.read()
                #     metadata = json.loads(json_string)
                #     # ...continue code on this level...
Esempio n. 14
0
    def send_salmon(self, source_obj, target_resp=None):
        self.resp.protocol = 'ostatus'

        # fetch target HTML page, extract Atom rel-alternate link
        if not target_resp:
            target_resp = common.requests_get(self.resp.target())

        parsed = BeautifulSoup(target_resp.content, from_encoding=target_resp.encoding)
        atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM)
        if not atom_url or not atom_url.get('href'):
            common.error(self, 'Target post %s has no Atom link' % self.resp.target(),
                         status=400)

        # fetch Atom target post, extract and inject id into source object
        feed = common.requests_get(atom_url['href']).text
        parsed = feedparser.parse(feed)
        logging.info('Parsed: %s', json.dumps(parsed, indent=2,
                                              default=lambda key: '-'))
        entry = parsed.entries[0]
        target_id = entry.id
        in_reply_to = source_obj.get('inReplyTo')
        source_obj_obj = source_obj.get('object')
        if in_reply_to:
            in_reply_to[0]['id'] = target_id
        elif isinstance(source_obj_obj, dict):
            source_obj_obj['id'] = target_id

        # Mastodon (and maybe others?) require a rel-mentioned link to the
        # original post's author to make it show up as a reply:
        #   app/services/process_interaction_service.rb
        # ...so add them as a tag, which atom renders as a rel-mention link.
        authors = entry.get('authors', None)
        if authors:
            url = entry.authors[0].get('href')
            if url:
                source_obj.setdefault('tags', []).append({'url': url})

        # extract and discover salmon endpoint
        logging.info('Discovering Salmon endpoint in %s', atom_url['href'])
        endpoint = django_salmon.discover_salmon_endpoint(feed)

        if not endpoint:
            # try webfinger
            parsed = urlparse.urlparse(self.resp.target())
            # TODO: test missing email
            email = entry.author_detail.get('email') or '@'.join(
                (entry.author_detail.name, parsed.netloc))
            try:
                # TODO: always https?
                resp = common.requests_get(
                    '%s://%s/.well-known/webfinger?resource=acct:%s' %
                    (parsed.scheme, parsed.netloc, email), verify=False)
                endpoint = django_salmon.get_salmon_replies_link(resp.json())
            except requests.HTTPError as e:
                pass

        if not endpoint:
            common.error(self, 'No salmon endpoint found!', status=400)
        logging.info('Discovered Salmon endpoint %s', endpoint)

        # construct reply Atom object
        source_url = self.resp.source()
        activity = (source_obj if source_obj.get('verb') in source.VERBS_WITH_OBJECT
                    else {'object': source_obj})
        entry = atom.activity_to_atom(activity, xml_base=source_url)
        logging.info('Converted %s to Atom:\n%s', source_url, entry)

        # sign reply and wrap in magic envelope
        domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(domain)
        logging.info('Using key for %s: %s', domain, key)
        magic_envelope = magicsigs.magic_envelope(
            entry, common.CONTENT_TYPE_ATOM, key)

        logging.info('Sending Salmon slap to %s', endpoint)
        common.requests_post(
            endpoint, data=common.XML_UTF8 + magic_envelope,
            headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
Esempio n. 15
0
import common
import os
from ods_catalog import credentials
url = 'https://data.bs.ch/explore/dataset/100055/download/?format=csv&use_labels_for_header=true&refine.visibility=domain&refine.publishing_published=True'
file = os.path.join(credentials.path, credentials.filename)
print(f'Downloading {file} from {url}...')
r = common.requests_get(url, auth=(credentials.ods_user, credentials.ods_password))
open(file, 'wb').write(r.content)
common.upload_ftp(filename=file, server=credentials.ftp_server, user=credentials.ftp_user, password=credentials.ftp_pass, remote_path=credentials.ftp_path)
Esempio n. 16
0
                    ftp.retrbinary(f"RETR {file_name}", f.write)
                local_files[(station, date_string)] = local_file
    ftp.quit()


print(f'Connecting to FTP Server to read data...')
download_data_files()
dfs = {}
all_data = pd.DataFrame(
    columns=['LocalDateTime', 'Value', 'Latitude', 'Longitude', 'EUI'])
print('Reading csv files into data frames...')
urllib3.disable_warnings()
for station in stations:
    print(f'Retrieving latest timestamp for station "{station}" from ODS...')
    r = common.requests_get(
        url=
        f'https://data.bs.ch/api/records/1.0/search/?dataset=100087&q=&rows=1&sort=timestamp&refine.station_id={station}',
        verify=False)
    r.raise_for_status()
    latest_ods_timestamp = r.json()['records'][0]['fields']['timestamp']
    print(f'Latest timestamp is {latest_ods_timestamp}.')
    for date_string in [yesterday_string, today_string]:
        try:
            print(f"Reading {local_files[(station, date_string)]}...")
            df = pd.read_csv(local_files[(station, date_string)],
                             sep=';',
                             na_filter=False)
            print(f'Calculating ISO8601 time string...')
            df['timestamp'] = pd.to_datetime(
                df.LocalDateTime,
                format='%d.%m.%Y %H:%M').dt.tz_localize('Europe/Zurich',
                                                        ambiguous='infer')
Esempio n. 17
0
from datetime import datetime
import urllib3
import os
import pandas as pd
import common
import json
from requests.auth import HTTPBasicAuth
from functools import reduce
from bafu_hydrodaten import credentials

print('Loading data into data frames...')
dfs = []
for file in credentials.files:
    response = common.requests_get(f'{credentials.https_url}/{file}',
                                   auth=HTTPBasicAuth(credentials.https_user,
                                                      credentials.https_pass),
                                   stream=True)
    df = pd.read_csv(response.raw,
                     parse_dates=True,
                     infer_datetime_format=True)
    dfs.append(df)

print(f'Merging data frames...')
all_df = reduce(
    lambda left, right: pd.merge(left, right, on=['Time'], how='outer'), dfs)
all_filename = f"{os.path.join(credentials.path, 'bafu_hydrodaten/data/')}hydrodata_{datetime.today().strftime('%Y-%m-%d')}.csv"
all_df.to_csv(all_filename, index=False)
common.upload_ftp(all_filename, credentials.ftp_server, credentials.ftp_user,
                  credentials.ftp_pass, credentials.ftp_dir_all)

print('Processing data...')
Esempio n. 18
0
import os
import common
from tba_wildedeponien import credentials
from io import StringIO

# Subsequently get only data since yesterday
from_timestamp = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
api_url = f'https://tba-bs.ch/export?object=sr_wilde_deponien_ogd&from={from_timestamp}&format=csv'

# Or: get all data once
# from_timestamp = 'ever'
# api_url = f'https://tba-bs.ch/export?object=sr_wilde_deponien_ogd&format=csv'

print(
    f'Retrieving data since {from_timestamp} from API call to "{api_url}"...')
r = common.requests_get(url=api_url,
                        auth=(credentials.api_user, credentials.api_password))

if r.status_code == 200:
    if len(r.text) == 0:
        print('No data retrieved from API. Job successful!')
    else:
        data = StringIO(r.text)
        df = pd.read_csv(data, sep=';')
        print('Retrieving lat and lon from column "koordinaten"...')
        df['coords'] = df.koordinaten.str.replace('POINT(', '', regex=False)
        df['coords'] = df.coords.str.replace(')', '', regex=False)
        # df['coords'] = df.coords.str.replace(' ', ',', regex=False)
        df2 = df['coords'].str.split(' ', expand=True)
        df = df.assign(lon=df2[[0]], lat=df2[[1]])
        df.lat = pd.to_numeric(df.lat)
        df.lon = pd.to_numeric(df.lon)
Esempio n. 19
0
    def try_activitypub(self):
        source = util.get_required_param(self, 'source')

        # fetch source page, convert to ActivityStreams
        source_resp = common.requests_get(source)
        source_url = source_resp.url or source
        source_mf2 = mf2py.parse(source_resp.text, url=source_url)
        # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2))

        entry = mf2util.find_first_entry(source_mf2, ['h-entry'])
        logging.info('First entry: %s', json.dumps(entry, indent=2))
        # make sure it has url, since we use that for AS2 id, which is required
        # for ActivityPub.
        props = entry.setdefault('properties', {})
        if not props.get('url'):
            props['url'] = [source_url]

        source_obj = microformats2.json_to_object(entry, fetch_mf2=True)
        logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2))

        # fetch target page as AS object. target is first in-reply-to, like-of,
        # or repost-of, *not* target query param.)
        target = util.get_url(util.get_first(source_obj, 'inReplyTo') or
                              util.get_first(source_obj, 'object'))
        if not target:
            common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of '
                         'found in %s' % source_url)

        try:
            target_resp = common.get_as2(target)
        except (requests.HTTPError, exc.HTTPBadGateway) as e:
            if (e.response.status_code // 100 == 2 and
                common.content_type(e.response).startswith('text/html')):
                self.resp = Response.get_or_create(
                    source=source_url, target=e.response.url or target,
                    direction='out', source_mf2=json.dumps(source_mf2))
                return self.send_salmon(source_obj, target_resp=e.response)
            raise

        target_url = target_resp.url or target
        self.resp = Response.get_or_create(
            source=source_url, target=target_url, direction='out',
            protocol='activitypub', source_mf2=json.dumps(source_mf2))

        # find actor's inbox
        target_obj = target_resp.json()
        inbox_url = target_obj.get('inbox')

        if not inbox_url:
            # TODO: test actor/attributedTo and not, with/without inbox
            actor = target_obj.get('actor') or target_obj.get('attributedTo')
            if isinstance(actor, dict):
                inbox_url = actor.get('inbox')
                actor = actor.get('url')
            if not inbox_url and not actor:
                common.error(self, 'Target object has no actor or attributedTo URL')

        if not inbox_url:
            # fetch actor as AS object
            actor = common.get_as2(actor).json()
            inbox_url = actor.get('inbox')

        if not inbox_url:
            # TODO: probably need a way to save errors like this so that we can
            # return them if ostatus fails too.
            # common.error(self, 'Target actor has no inbox')
            return self.send_salmon(source_obj, target_resp=target_resp)

        # convert to AS2
        source_domain = urlparse.urlparse(source_url).netloc
        key = MagicKey.get_or_create(source_domain)
        source_activity = common.postprocess_as2(
            as2.from_as1(source_obj), target=target_obj, key=key)

        if self.resp.status == 'complete':
            source_activity['type'] = 'Update'

        # prepare HTTP Signature (required by Mastodon)
        # https://w3c.github.io/activitypub/#authorization-lds
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07
        # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
        acct = 'acct:%s@%s' % (source_domain, source_domain)
        auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct,
                                 algorithm='rsa-sha256')

        # deliver source object to target actor's inbox.
        headers = {
            'Content-Type': common.CONTENT_TYPE_AS2,
            # required for HTTP Signature
            # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
            'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        }
        inbox_url = urlparse.urljoin(target_url, inbox_url)
        resp = common.requests_post(inbox_url, json=source_activity, auth=auth,
                                    headers=headers)
        self.response.status_int = resp.status_code
        if resp.status_code == 202:
            self.response.write('202 response! If this is Mastodon 1.x, their '
                                'signature verification probably failed. :(\n')
        self.response.write(resp.text)
Esempio n. 20
0
import json
from datetime import datetime
from parkendd import credentials
import pandas as pd
import common

apiUrl = 'https://api.parkendd.de/Basel'
print(f'Getting latest data from {apiUrl}...')
response = common.requests_get(url=apiUrl)


print(f'Parsing json...')
parsed = json.loads(response.text)
pretty_resp = json.dumps(parsed, indent=4, sort_keys=True)
# json_file_name = f'{credentials.path}json/parkendd-{str(datetime.now()).replace(":", "")}.json'
# resp_file = open(json_file_name, 'w+')
# resp_file.write(pretty_resp)
# resp_file.close()

lots_file_name = f'{credentials.path}csv/lots/parkendd-lots.csv'
print(f'Processing data...')
for lot in parsed['lots']:
    lot['last_downloaded'] = parsed['last_downloaded']
    lot['last_updated'] = parsed['last_updated']

normalized = pd.json_normalize(parsed, record_path='lots')
normalized['title'] = "Parkhaus " + normalized['name']
normalized['id2'] = normalized['id'].str.replace('baselparkhaus', '')
normalized['link'] = "https://www.parkleitsystem-basel.ch/parkhaus/" + normalized['id2']
normalized['description'] = 'Anzahl freie Parkplätze: ' + normalized['free'].astype(str)
normalized['published'] = normalized['last_downloaded']
Esempio n. 21
0
    def _try_salmon(self, resp):
        """
        Args:
          resp: Response
        """
        # fetch target HTML page, extract Atom rel-alternate link
        target = resp.target()
        if not self.target_resp:
            self.target_resp = common.requests_get(target)

        parsed = util.parse_html(self.target_resp)
        atom_url = parsed.find('link',
                               rel='alternate',
                               type=common.CONTENT_TYPE_ATOM)
        if not atom_url or not atom_url.get('href'):
            self.error('Target post %s has no Atom link' % resp.target(),
                       status=400)

        # fetch Atom target post, extract and inject id into source object
        base_url = ''
        base = parsed.find('base')
        if base and base.get('href'):
            base_url = base['href']
        atom_link = parsed.find('link',
                                rel='alternate',
                                type=common.CONTENT_TYPE_ATOM)
        atom_url = urllib.parse.urljoin(
            resp.target(), urllib.parse.urljoin(base_url, atom_link['href']))

        feed = common.requests_get(atom_url).text
        parsed = feedparser.parse(feed)
        logging.info('Parsed: %s', json_dumps(parsed, indent=2))
        entry = parsed.entries[0]
        target_id = entry.id
        in_reply_to = self.source_obj.get('inReplyTo')
        source_obj_obj = self.source_obj.get('object')
        if in_reply_to:
            for elem in in_reply_to:
                if elem.get('url') == target:
                    elem['id'] = target_id
        elif isinstance(source_obj_obj, dict):
            source_obj_obj['id'] = target_id

        # Mastodon (and maybe others?) require a rel-mentioned link to the
        # original post's author to make it show up as a reply:
        #   app/services/process_interaction_service.rb
        # ...so add them as a tag, which atom renders as a rel-mention link.
        authors = entry.get('authors', None)
        if authors:
            url = entry.authors[0].get('href')
            if url:
                self.source_obj.setdefault('tags', []).append({'url': url})

        # extract and discover salmon endpoint
        logging.info('Discovering Salmon endpoint in %s', atom_url)
        endpoint = django_salmon.discover_salmon_endpoint(feed)

        if not endpoint:
            # try webfinger
            parsed = urllib.parse.urlparse(resp.target())
            # TODO: test missing email
            author = entry.get('author_detail', {})
            email = author.get('email') or '@'.join(
                (author.get('name', ''), parsed.netloc))
            try:
                # TODO: always https?
                profile = common.requests_get(
                    '%s://%s/.well-known/webfinger?resource=acct:%s' %
                    (parsed.scheme, parsed.netloc, email),
                    verify=False)
                endpoint = django_salmon.get_salmon_replies_link(
                    profile.json())
            except requests.HTTPError as e:
                pass

        if not endpoint:
            self.error('No salmon endpoint found!', status=400)
        logging.info('Discovered Salmon endpoint %s', endpoint)

        # construct reply Atom object
        self.source_url = resp.source()
        activity = self.source_obj
        if self.source_obj.get('verb') not in source.VERBS_WITH_OBJECT:
            activity = {'object': self.source_obj}
        entry = atom.activity_to_atom(activity, xml_base=self.source_url)
        logging.info('Converted %s to Atom:\n%s', self.source_url, entry)

        # sign reply and wrap in magic envelope
        domain = urllib.parse.urlparse(self.source_url).netloc
        key = MagicKey.get_or_create(domain)
        logging.info('Using key for %s: %s', domain, key)
        magic_envelope = magicsigs.magic_envelope(entry,
                                                  common.CONTENT_TYPE_ATOM,
                                                  key).decode()

        logging.info('Sending Salmon slap to %s', endpoint)
        common.requests_post(
            endpoint,
            data=common.XML_UTF8 + magic_envelope,
            headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
        return True
Esempio n. 22
0
    def template_vars(self, domain=None, url=None):
        logging.debug(f'Headers: {list(request.headers.items())}')

        if domain.split('.')[-1] in NON_TLDS:
            error(f"{domain} doesn't look like a domain", status=404)

        # find representative h-card. try url, then url's home page, then domain
        urls = [f'http://{domain}/']
        if url:
            urls = [url, urllib.parse.urljoin(url, '/')] + urls

        for candidate in urls:
            resp = common.requests_get(candidate)
            parsed = util.parse_html(resp)
            mf2 = util.parse_mf2(parsed, url=resp.url)
            # logging.debug(f'Parsed mf2 for {resp.url}: {json_dumps(mf2, indent=2)}')
            hcard = mf2util.representative_hcard(mf2, resp.url)
            if hcard:
                logging.info(
                    f'Representative h-card: {json_dumps(hcard, indent=2)}')
                break
        else:
            error(
                f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}"
            )

        logging.info(f'Generating WebFinger data for {domain}')
        key = models.MagicKey.get_or_create(domain)
        props = hcard.get('properties', {})
        urls = util.dedupe_urls(props.get('url', []) + [resp.url])
        canonical_url = urls[0]

        acct = f'{domain}@{domain}'
        for url in urls:
            if url.startswith('acct:'):
                urluser, urldomain = util.parse_acct_uri(url)
                if urldomain == domain:
                    acct = f'{urluser}@{domain}'
                    logging.info(f'Found custom username: acct:{acct}')
                    break

        # discover atom feed, if any
        atom = parsed.find('link',
                           rel='alternate',
                           type=common.CONTENT_TYPE_ATOM)
        if atom and atom['href']:
            atom = urllib.parse.urljoin(resp.url, atom['href'])
        else:
            atom = 'https://granary.io/url?' + urllib.parse.urlencode(
                {
                    'input': 'html',
                    'output': 'atom',
                    'url': resp.url,
                    'hub': resp.url,
                })

        # discover PuSH, if any
        for link in resp.headers.get('Link', '').split(','):
            match = common.LINK_HEADER_RE.match(link)
            if match and match.group(2) == 'hub':
                hub = match.group(1)
            else:
                hub = 'https://bridgy-fed.superfeedr.com/'

        # generate webfinger content
        data = util.trim_nulls({
            'subject':
            'acct:' + acct,
            'aliases':
            urls,
            'magic_keys': [{
                'value': key.href()
            }],
            'links':
            sum(([{
                'rel': 'http://webfinger.net/rel/profile-page',
                'type': 'text/html',
                'href': url,
            }] for url in urls if url.startswith("http")), []) +
            [{
                'rel': 'http://webfinger.net/rel/avatar',
                'href': get_text(url),
            } for url in props.get('photo', [])] + [
                {
                    'rel': 'canonical_uri',
                    'type': 'text/html',
                    'href': canonical_url,
                },

                # ActivityPub
                {
                    'rel': 'self',
                    'type': common.CONTENT_TYPE_AS2,
                    # WARNING: in python 2 sometimes request.host_url lost port,
                    # http://localhost:8080 would become just http://localhost. no
                    # clue how or why. pay attention here if that happens again.
                    'href': f'{request.host_url}{domain}',
                },
                {
                    'rel': 'inbox',
                    'type': common.CONTENT_TYPE_AS2,
                    'href': f'{request.host_url}{domain}/inbox',
                },

                # OStatus
                {
                    'rel': 'http://schemas.google.com/g/2010#updates-from',
                    'type': common.CONTENT_TYPE_ATOM,
                    'href': atom,
                },
                {
                    'rel': 'hub',
                    'href': hub,
                },
                {
                    'rel': 'magic-public-key',
                    'href': key.href(),
                },
                {
                    'rel': 'salmon',
                    'href': f'{request.host_url}{domain}/salmon',
                }
            ]
        })
        logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}')
        return data