def post(self): logging.info('(Params: %s )', self.request.params.items()) # fetch source page source = util.get_required_param(self, 'source') source_resp = common.requests_get(source) self.source_url = source_resp.url or source self.source_domain = urlparse.urlparse(self.source_url).netloc.split(':')[0] self.source_mf2 = mf2py.parse(source_resp.text, url=self.source_url, img_with_alt=True) # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(self.source_mf2, indent=2)) # check for backlink to bridgy fed (for webmention spec and to confirm # source's intent to federate to mastodon) if (self.request.host_url not in source_resp.text and urllib.quote(self.request.host_url, safe='') not in source_resp.text): common.error(self, "Couldn't find link to %s" % self.request.host_url) # convert source page to ActivityStreams entry = mf2util.find_first_entry(self.source_mf2, ['h-entry']) if not entry: common.error(self, 'No microformats2 found on %s' % self.source_url) logging.info('First entry: %s', json.dumps(entry, indent=2)) # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [self.source_url] self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info('Converted to AS1: %s', json.dumps(self.source_obj, indent=2)) self.try_activitypub() or self.try_salmon()
def get(self, domain): url = 'http://%s/' % domain resp = common.requests_get(url) mf2 = mf2py.parse(resp.text, url=resp.url, img_with_alt=True) # logging.info('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) hcard = mf2util.representative_hcard(mf2, resp.url) logging.info('Representative h-card: %s', json.dumps(hcard, indent=2)) if not hcard: common.error( self, """\ Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s""" % resp.url) key = MagicKey.get_or_create(domain) obj = common.postprocess_as2(as2.from_as1( microformats2.json_to_object(hcard)), key=key) obj.update({ 'inbox': '%s/%s/inbox' % (appengine_config.HOST_URL, domain), 'outbox': '%s/%s/outbox' % (appengine_config.HOST_URL, domain), 'following': '%s/%s/following' % (appengine_config.HOST_URL, domain), 'followers': '%s/%s/followers' % (appengine_config.HOST_URL, domain), }) logging.info('Returning: %s', json.dumps(obj, indent=2)) self.response.headers.update({ 'Content-Type': common.CONTENT_TYPE_AS2, 'Access-Control-Allow-Origin': '*', }) self.response.write(json.dumps(obj, indent=2))
def call_fieldclimate_api(apiRoute, publicKey, privateKey, filename): auth = AuthHmacMetosGet(apiRoute, publicKey, privateKey) response = common.requests_get(url=apiURI + apiRoute, headers={'Accept': 'application/json'}, auth=auth) parsed = json.loads(response.text) # print(response.json()) pretty_resp = json.dumps(parsed, indent=4, sort_keys=True) # print(pretty_resp) resp_file = open(f'{credentials.path}json/{filename}.json', 'w+') resp_file.write(pretty_resp) normalized = pd.json_normalize(parsed) return pretty_resp, normalized
def wait_for_idle(harvester_id): while True: print(f'Checking status of harvester "{harvester_id}"...') resp = common.requests_get( url= f'https://basel-stadt.opendatasoft.com/api/management/v2/harvesters/{harvester_id}/', auth=(credentials.ods_user, credentials.ods_password), proxies={'https': credentials.proxy}) handle_http_errors(resp) status = resp.json()['status'] print(f'Harvester "{harvester_id}" is "{status}".') if status == 'idle': break else: seconds = 10 print(f'Waiting {seconds} seconds before trying again...') time.sleep(seconds)
def add_wm(url=None): """Proxies HTTP requests and adds Link header to our webmention endpoint.""" url = urllib.parse.unquote(url) if not url.startswith('http://') and not url.startswith('https://'): error('URL must start with http:// or https://') try: got = common.requests_get(url) except requests.exceptions.Timeout as e: error(str(e), status=504, exc_info=True) except requests.exceptions.RequestException as e: error(str(e), status=502, exc_info=True) resp = flask.make_response(got.content, got.status_code, dict(got.headers)) resp.headers.add( 'Link', LINK_HEADER % (request.args.get('endpoint') or request.host_url + 'webmention')) return resp
def get(self, url): url = urllib.parse.unquote(url) if not url.startswith('http://') and not url.startswith('https://'): self.error('URL must start with http:// or https://') try: resp = common.requests_get(url) except requests.exceptions.Timeout as e: self.error(str(e), status=504, exc_info=True) except requests.exceptions.RequestException as e: self.error(str(e), status=502, exc_info=True) self.response.status_int = resp.status_code self.response.write(resp.content) endpoint = LINK_HEADER % (str(self.request.get('endpoint')) or self.request.host_url + '/webmention') self.response.headers.clear() self.response.headers.update(resp.headers) self.response.headers.add('Link', endpoint)
def dispatch_request(self): logging.info(f'Params: {list(request.form.items())}') # fetch source page source = flask_util.get_required_param('source') source_resp = common.requests_get(source) self.source_url = source_resp.url or source self.source_domain = urllib.parse.urlparse( self.source_url).netloc.split(':')[0] self.source_mf2 = util.parse_mf2(source_resp) # logging.debug(f'Parsed mf2 for {source_resp.url} : {json_dumps(self.source_mf2 indent=2)}') # check for backlink to bridgy fed (for webmention spec and to confirm # source's intent to federate to mastodon) if (request.host_url not in source_resp.text and urllib.parse.quote( request.host_url, safe='') not in source_resp.text): error("Couldn't find link to {request.host_url}") # convert source page to ActivityStreams entry = mf2util.find_first_entry(self.source_mf2, ['h-entry']) if not entry: error(f'No microformats2 found on {self.source_url}') logging.info(f'First entry: {json_dumps(entry, indent=2)}') # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [self.source_url] self.source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info( f'Converted to AS1: {json_dumps(self.source_obj, indent=2)}') for method in self.try_activitypub, self.try_salmon: ret = method() if ret: return ret return ''
def retrieve_vmdl_data(): print(f'Retrieving vmdl data...') payload_token = f'client_id={credentials.vmdl_client_id}&scope={credentials.vmdl_scope}&username={credentials.vmdl_user}&password={credentials.vmdl_password}&grant_type=password' headers_token = {'Content-Type': 'application/x-www-form-urlencoded'} print(f'Getting OAUTH2 access token...') resp_token = requests.request("POST", credentials.vmdl_url_token, headers=headers_token, data=payload_token) resp_token.raise_for_status() # token_type = resp_token.json()['token_type'] auth_string = f'Bearer {resp_token.json()["access_token"]}' payload_download={} headers_download = {'Authorization': auth_string} print(f'Downloading data...') resp_download = common.requests_get(credentials.vmdl_url_download, headers=headers_download, data=payload_download) resp_download.raise_for_status() file_path = os.path.join(credentials.vmdl_path, credentials.vmdl_file) print(f'Writing data to file {file_path}...') resp_download.encoding = 'utf-8' with open(file_path, "w") as f: f.write(resp_download.text) return file_path
def convert_to_as2(self, url): """Fetch a URL as HTML, convert it to AS2, and return it. Currently mainly for Pixelfed. https://github.com/snarfed/bridgy-fed/issues/39 """ resp = common.requests_get(url) mf2 = mf2py.parse(resp.text, url=resp.url, img_with_alt=True) entry = mf2util.find_first_entry(mf2, ['h-entry']) logging.info('Parsed mf2 for %s: %s', resp.url, json.dumps(entry, indent=2)) obj = common.postprocess_as2( as2.from_as1(microformats2.json_to_object(entry))) logging.info('Returning: %s', json.dumps(obj, indent=2)) self.response.headers.update({ 'Content-Type': common.CONTENT_TYPE_AS2, 'Access-Control-Allow-Origin': '*', }) self.response.write(json.dumps(obj, indent=2))
from bag_coronavirus import credentials import os import common import pandas as pd print(f"Getting today's data url...") context_json = common.requests_get( url='https://www.covid19.admin.ch/api/data/context').json() csv_daily_tests_url = context_json['sources']['individual']['csv']['daily'][ 'test'] print(f'Reading current csv from {csv_daily_tests_url} into data frame...') df = pd.read_csv(csv_daily_tests_url) print(f'Filtering out BS rows, some columns, and rename them...') df_bs = df.query('geoRegion == "BS"') df_bs = df_bs.filter(items=['datum', 'entries_neg', 'entries_pos', 'entries']) df_bs[ 'positivity_rate_percent'] = df_bs['entries_pos'] / df_bs['entries'] * 100 df_bs['positivity_rate'] = df_bs['entries_pos'] / df_bs['entries'] df_bs = df_bs.rename( columns={ 'entries_neg': 'negative_tests', 'entries_pos': 'positive_tests', 'entries': 'total_tests' }) print(f'Calculating columns...') df_bs['dayofweek'] = pd.to_datetime(df_bs['datum']).dt.dayofweek + 1 df_bs['weekday_nr'] = pd.to_datetime(df_bs['datum']).dt.dayofweek df_bs['woche'] = pd.to_datetime(df_bs['datum']).dt.isocalendar().week export_file_name = os.path.join(credentials.path, credentials.file_name) print(f'Exporting to file {export_file_name}...')
import common from xml.etree import ElementTree import requests from bafu_hydrodaten import credentials print(f'Connecting to HTTPS Server to read data...') local_path = 'bafu_hydrodaten/data' r = common.requests_get(url=credentials.https_live_url, auth=(credentials.https_user, credentials.https_pass)) print(f'Parsing response XML...') root = ElementTree.fromstring(r.content) # for child in root: # print(child.tag, child.attrib) timestamp = root.find( ".//*[@number='2289']/parameter[@type='2']/datetime").text pegelstand = root.find(".//*[@number='2289']/parameter[@type='2']/value").text abfluss = root.find(".//*[@number='2289']/parameter[@type='10']/value").text print(f'current data: ') print(f'Timestamp: {timestamp}') print(f'Pegelstand: {pegelstand}') print(f'Abfluss: {abfluss}') print(f'Posting data to ods...') payload = { 'zeitstempel': timestamp, 'pegel': pegelstand,
def template_vars(self, domain, url=None): assert domain if domain.split('.')[-1] in NON_TLDS: common.error(self, "%s doesn't look like a domain" % domain, status=404) # find representative h-card. try url, then url's home page, then domain urls = ['http://%s/' % domain] if url: urls = [url, urlparse.urljoin(url, '/')] + urls for candidate in urls: resp = common.requests_get(candidate) parsed = common.beautifulsoup_parse(resp.content, from_encoding=resp.encoding) mf2 = mf2py.parse(parsed, url=resp.url, img_with_alt=True) # logging.debug('Parsed mf2 for %s: %s', resp.url, json.dumps(mf2, indent=2)) hcard = mf2util.representative_hcard(mf2, resp.url) if hcard: logging.info('Representative h-card: %s', json.dumps(hcard, indent=2)) break else: common.error( self, """\ Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on %s""" % resp.url) logging.info('Generating WebFinger data for %s', domain) key = models.MagicKey.get_or_create(domain) props = hcard.get('properties', {}) urls = util.dedupe_urls(props.get('url', []) + [resp.url]) canonical_url = urls[0] acct = '%s@%s' % (domain, domain) for url in urls: if url.startswith('acct:'): urluser, urldomain = util.parse_acct_uri(url) if urldomain == domain: acct = '%s@%s' % (urluser, domain) logging.info('Found custom username: acct:%s', acct) break # discover atom feed, if any atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if atom and atom['href']: atom = urlparse.urljoin(resp.url, atom['href']) else: atom = 'https://granary.io/url?' + urllib.urlencode( { 'input': 'html', 'output': 'atom', 'url': resp.url, 'hub': resp.url, }) # discover PuSH, if any for link in resp.headers.get('Link', '').split(','): match = common.LINK_HEADER_RE.match(link) if match and match.group(2) == 'hub': hub = match.group(1) else: hub = 'https://bridgy-fed.superfeedr.com/' # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + acct, 'aliases': urls, 'magic_keys': [{ 'value': key.href() }], 'links': sum(([{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, }] for url in urls if url.startswith("http")), []) + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': url, } for url in props.get('photo', [])] + [ { 'rel': 'canonical_uri', 'type': 'text/html', 'href': canonical_url, }, # ActivityPub { 'rel': 'self', 'type': 'application/activity+json', # use HOST_URL instead of e.g. request.host_url because it # sometimes lost port, e.g. http://localhost:8080 would become # just http://localhost. no clue how or why. 'href': '%s/%s' % (appengine_config.HOST_URL, domain), }, { 'rel': 'inbox', 'type': 'application/activity+json', 'href': '%s/%s/inbox' % (appengine_config.HOST_URL, domain), }, # OStatus { 'rel': 'http://schemas.google.com/g/2010#updates-from', 'type': common.CONTENT_TYPE_ATOM, 'href': atom, }, { 'rel': 'hub', 'href': hub, }, { 'rel': 'magic-public-key', 'href': key.href(), }, { 'rel': 'salmon', 'href': '%s/%s/salmon' % (appengine_config.HOST_URL, domain), } ] }) logging.info('Returning WebFinger data: %s', json.dumps(data, indent=2)) return data
# Load metadata from geocat.ch # See documentation at https://www.geocat.admin.ch/de/dokumentation/csw.html # For unknown reasons (probably proxy-related), requests always returns http error 404, so we have to revert to launching curl in a subprocess # curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "Accept: application/xml" -H "X-XSRF-TOKEN: a1284e46-b378-42a4-ac6a-d48069e05494" # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', params={'accept': 'application/json'}, proxies={'https': credentials.proxy}) # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', headers={'accept': 'application/xml, application/json'}, proxies={'https': credentials.proxy}) # cmd = 'curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "accept: application/json" -k' # args = shlex.split(cmd) # In some geocat URLs there's a tab character, remove it. geocat_uid = row['geocat'].rsplit('/', 1)[-1].replace('\t', '') geocat_url = f'https://www.geocat.ch/geonetwork/srv/api/0.1/records/{geocat_uid}' print(f'Getting metadata from {geocat_url}...') r = common.requests_get( geocat_url, headers={'accept': 'application/xml, application/json'}, proxies={'https': credentials.proxy}) r.raise_for_status() metadata = r.json() # metadata_file = os.path.join(credentials.path_root, 'metadata', geocat_uid + '.json') # cmd = '/usr/bin/curl --proxy ' + credentials.proxy + ' "https://www.geocat.ch/geonetwork/srv/api/0.1/records/' + geocat_uid + '" -H "accept: application/json" -s -k > ' + metadata_file # print('Running curl to get geocat.ch metadata: ') # resp = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE) # print('Processing geocat.ch metadata file ' + metadata_file + '...') # with open(metadata_file, 'r') as json_file: # print('Adding shape ' + shpfilename_noext + ' to harverster csv...') # json_string = json_file.read() # metadata = json.loads(json_string) # # ...continue code on this level...
def send_salmon(self, source_obj, target_resp=None): self.resp.protocol = 'ostatus' # fetch target HTML page, extract Atom rel-alternate link if not target_resp: target_resp = common.requests_get(self.resp.target()) parsed = BeautifulSoup(target_resp.content, from_encoding=target_resp.encoding) atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if not atom_url or not atom_url.get('href'): common.error(self, 'Target post %s has no Atom link' % self.resp.target(), status=400) # fetch Atom target post, extract and inject id into source object feed = common.requests_get(atom_url['href']).text parsed = feedparser.parse(feed) logging.info('Parsed: %s', json.dumps(parsed, indent=2, default=lambda key: '-')) entry = parsed.entries[0] target_id = entry.id in_reply_to = source_obj.get('inReplyTo') source_obj_obj = source_obj.get('object') if in_reply_to: in_reply_to[0]['id'] = target_id elif isinstance(source_obj_obj, dict): source_obj_obj['id'] = target_id # Mastodon (and maybe others?) require a rel-mentioned link to the # original post's author to make it show up as a reply: # app/services/process_interaction_service.rb # ...so add them as a tag, which atom renders as a rel-mention link. authors = entry.get('authors', None) if authors: url = entry.authors[0].get('href') if url: source_obj.setdefault('tags', []).append({'url': url}) # extract and discover salmon endpoint logging.info('Discovering Salmon endpoint in %s', atom_url['href']) endpoint = django_salmon.discover_salmon_endpoint(feed) if not endpoint: # try webfinger parsed = urlparse.urlparse(self.resp.target()) # TODO: test missing email email = entry.author_detail.get('email') or '@'.join( (entry.author_detail.name, parsed.netloc)) try: # TODO: always https? resp = common.requests_get( '%s://%s/.well-known/webfinger?resource=acct:%s' % (parsed.scheme, parsed.netloc, email), verify=False) endpoint = django_salmon.get_salmon_replies_link(resp.json()) except requests.HTTPError as e: pass if not endpoint: common.error(self, 'No salmon endpoint found!', status=400) logging.info('Discovered Salmon endpoint %s', endpoint) # construct reply Atom object source_url = self.resp.source() activity = (source_obj if source_obj.get('verb') in source.VERBS_WITH_OBJECT else {'object': source_obj}) entry = atom.activity_to_atom(activity, xml_base=source_url) logging.info('Converted %s to Atom:\n%s', source_url, entry) # sign reply and wrap in magic envelope domain = urlparse.urlparse(source_url).netloc key = MagicKey.get_or_create(domain) logging.info('Using key for %s: %s', domain, key) magic_envelope = magicsigs.magic_envelope( entry, common.CONTENT_TYPE_ATOM, key) logging.info('Sending Salmon slap to %s', endpoint) common.requests_post( endpoint, data=common.XML_UTF8 + magic_envelope, headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
import common import os from ods_catalog import credentials url = 'https://data.bs.ch/explore/dataset/100055/download/?format=csv&use_labels_for_header=true&refine.visibility=domain&refine.publishing_published=True' file = os.path.join(credentials.path, credentials.filename) print(f'Downloading {file} from {url}...') r = common.requests_get(url, auth=(credentials.ods_user, credentials.ods_password)) open(file, 'wb').write(r.content) common.upload_ftp(filename=file, server=credentials.ftp_server, user=credentials.ftp_user, password=credentials.ftp_pass, remote_path=credentials.ftp_path)
ftp.retrbinary(f"RETR {file_name}", f.write) local_files[(station, date_string)] = local_file ftp.quit() print(f'Connecting to FTP Server to read data...') download_data_files() dfs = {} all_data = pd.DataFrame( columns=['LocalDateTime', 'Value', 'Latitude', 'Longitude', 'EUI']) print('Reading csv files into data frames...') urllib3.disable_warnings() for station in stations: print(f'Retrieving latest timestamp for station "{station}" from ODS...') r = common.requests_get( url= f'https://data.bs.ch/api/records/1.0/search/?dataset=100087&q=&rows=1&sort=timestamp&refine.station_id={station}', verify=False) r.raise_for_status() latest_ods_timestamp = r.json()['records'][0]['fields']['timestamp'] print(f'Latest timestamp is {latest_ods_timestamp}.') for date_string in [yesterday_string, today_string]: try: print(f"Reading {local_files[(station, date_string)]}...") df = pd.read_csv(local_files[(station, date_string)], sep=';', na_filter=False) print(f'Calculating ISO8601 time string...') df['timestamp'] = pd.to_datetime( df.LocalDateTime, format='%d.%m.%Y %H:%M').dt.tz_localize('Europe/Zurich', ambiguous='infer')
from datetime import datetime import urllib3 import os import pandas as pd import common import json from requests.auth import HTTPBasicAuth from functools import reduce from bafu_hydrodaten import credentials print('Loading data into data frames...') dfs = [] for file in credentials.files: response = common.requests_get(f'{credentials.https_url}/{file}', auth=HTTPBasicAuth(credentials.https_user, credentials.https_pass), stream=True) df = pd.read_csv(response.raw, parse_dates=True, infer_datetime_format=True) dfs.append(df) print(f'Merging data frames...') all_df = reduce( lambda left, right: pd.merge(left, right, on=['Time'], how='outer'), dfs) all_filename = f"{os.path.join(credentials.path, 'bafu_hydrodaten/data/')}hydrodata_{datetime.today().strftime('%Y-%m-%d')}.csv" all_df.to_csv(all_filename, index=False) common.upload_ftp(all_filename, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, credentials.ftp_dir_all) print('Processing data...')
import os import common from tba_wildedeponien import credentials from io import StringIO # Subsequently get only data since yesterday from_timestamp = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d') api_url = f'https://tba-bs.ch/export?object=sr_wilde_deponien_ogd&from={from_timestamp}&format=csv' # Or: get all data once # from_timestamp = 'ever' # api_url = f'https://tba-bs.ch/export?object=sr_wilde_deponien_ogd&format=csv' print( f'Retrieving data since {from_timestamp} from API call to "{api_url}"...') r = common.requests_get(url=api_url, auth=(credentials.api_user, credentials.api_password)) if r.status_code == 200: if len(r.text) == 0: print('No data retrieved from API. Job successful!') else: data = StringIO(r.text) df = pd.read_csv(data, sep=';') print('Retrieving lat and lon from column "koordinaten"...') df['coords'] = df.koordinaten.str.replace('POINT(', '', regex=False) df['coords'] = df.coords.str.replace(')', '', regex=False) # df['coords'] = df.coords.str.replace(' ', ',', regex=False) df2 = df['coords'].str.split(' ', expand=True) df = df.assign(lon=df2[[0]], lat=df2[[1]]) df.lat = pd.to_numeric(df.lat) df.lon = pd.to_numeric(df.lon)
def try_activitypub(self): source = util.get_required_param(self, 'source') # fetch source page, convert to ActivityStreams source_resp = common.requests_get(source) source_url = source_resp.url or source source_mf2 = mf2py.parse(source_resp.text, url=source_url) # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2)) entry = mf2util.find_first_entry(source_mf2, ['h-entry']) logging.info('First entry: %s', json.dumps(entry, indent=2)) # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [source_url] source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2)) # fetch target page as AS object. target is first in-reply-to, like-of, # or repost-of, *not* target query param.) target = util.get_url(util.get_first(source_obj, 'inReplyTo') or util.get_first(source_obj, 'object')) if not target: common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of ' 'found in %s' % source_url) try: target_resp = common.get_as2(target) except (requests.HTTPError, exc.HTTPBadGateway) as e: if (e.response.status_code // 100 == 2 and common.content_type(e.response).startswith('text/html')): self.resp = Response.get_or_create( source=source_url, target=e.response.url or target, direction='out', source_mf2=json.dumps(source_mf2)) return self.send_salmon(source_obj, target_resp=e.response) raise target_url = target_resp.url or target self.resp = Response.get_or_create( source=source_url, target=target_url, direction='out', protocol='activitypub', source_mf2=json.dumps(source_mf2)) # find actor's inbox target_obj = target_resp.json() inbox_url = target_obj.get('inbox') if not inbox_url: # TODO: test actor/attributedTo and not, with/without inbox actor = target_obj.get('actor') or target_obj.get('attributedTo') if isinstance(actor, dict): inbox_url = actor.get('inbox') actor = actor.get('url') if not inbox_url and not actor: common.error(self, 'Target object has no actor or attributedTo URL') if not inbox_url: # fetch actor as AS object actor = common.get_as2(actor).json() inbox_url = actor.get('inbox') if not inbox_url: # TODO: probably need a way to save errors like this so that we can # return them if ostatus fails too. # common.error(self, 'Target actor has no inbox') return self.send_salmon(source_obj, target_resp=target_resp) # convert to AS2 source_domain = urlparse.urlparse(source_url).netloc key = MagicKey.get_or_create(source_domain) source_activity = common.postprocess_as2( as2.from_as1(source_obj), target=target_obj, key=key) if self.resp.status == 'complete': source_activity['type'] = 'Update' # prepare HTTP Signature (required by Mastodon) # https://w3c.github.io/activitypub/#authorization-lds # https://tools.ietf.org/html/draft-cavage-http-signatures-07 # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846 acct = 'acct:%s@%s' % (source_domain, source_domain) auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct, algorithm='rsa-sha256') # deliver source object to target actor's inbox. headers = { 'Content-Type': common.CONTENT_TYPE_AS2, # required for HTTP Signature # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3 'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'), } inbox_url = urlparse.urljoin(target_url, inbox_url) resp = common.requests_post(inbox_url, json=source_activity, auth=auth, headers=headers) self.response.status_int = resp.status_code if resp.status_code == 202: self.response.write('202 response! If this is Mastodon 1.x, their ' 'signature verification probably failed. :(\n') self.response.write(resp.text)
import json from datetime import datetime from parkendd import credentials import pandas as pd import common apiUrl = 'https://api.parkendd.de/Basel' print(f'Getting latest data from {apiUrl}...') response = common.requests_get(url=apiUrl) print(f'Parsing json...') parsed = json.loads(response.text) pretty_resp = json.dumps(parsed, indent=4, sort_keys=True) # json_file_name = f'{credentials.path}json/parkendd-{str(datetime.now()).replace(":", "")}.json' # resp_file = open(json_file_name, 'w+') # resp_file.write(pretty_resp) # resp_file.close() lots_file_name = f'{credentials.path}csv/lots/parkendd-lots.csv' print(f'Processing data...') for lot in parsed['lots']: lot['last_downloaded'] = parsed['last_downloaded'] lot['last_updated'] = parsed['last_updated'] normalized = pd.json_normalize(parsed, record_path='lots') normalized['title'] = "Parkhaus " + normalized['name'] normalized['id2'] = normalized['id'].str.replace('baselparkhaus', '') normalized['link'] = "https://www.parkleitsystem-basel.ch/parkhaus/" + normalized['id2'] normalized['description'] = 'Anzahl freie Parkplätze: ' + normalized['free'].astype(str) normalized['published'] = normalized['last_downloaded']
def _try_salmon(self, resp): """ Args: resp: Response """ # fetch target HTML page, extract Atom rel-alternate link target = resp.target() if not self.target_resp: self.target_resp = common.requests_get(target) parsed = util.parse_html(self.target_resp) atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if not atom_url or not atom_url.get('href'): self.error('Target post %s has no Atom link' % resp.target(), status=400) # fetch Atom target post, extract and inject id into source object base_url = '' base = parsed.find('base') if base and base.get('href'): base_url = base['href'] atom_link = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) atom_url = urllib.parse.urljoin( resp.target(), urllib.parse.urljoin(base_url, atom_link['href'])) feed = common.requests_get(atom_url).text parsed = feedparser.parse(feed) logging.info('Parsed: %s', json_dumps(parsed, indent=2)) entry = parsed.entries[0] target_id = entry.id in_reply_to = self.source_obj.get('inReplyTo') source_obj_obj = self.source_obj.get('object') if in_reply_to: for elem in in_reply_to: if elem.get('url') == target: elem['id'] = target_id elif isinstance(source_obj_obj, dict): source_obj_obj['id'] = target_id # Mastodon (and maybe others?) require a rel-mentioned link to the # original post's author to make it show up as a reply: # app/services/process_interaction_service.rb # ...so add them as a tag, which atom renders as a rel-mention link. authors = entry.get('authors', None) if authors: url = entry.authors[0].get('href') if url: self.source_obj.setdefault('tags', []).append({'url': url}) # extract and discover salmon endpoint logging.info('Discovering Salmon endpoint in %s', atom_url) endpoint = django_salmon.discover_salmon_endpoint(feed) if not endpoint: # try webfinger parsed = urllib.parse.urlparse(resp.target()) # TODO: test missing email author = entry.get('author_detail', {}) email = author.get('email') or '@'.join( (author.get('name', ''), parsed.netloc)) try: # TODO: always https? profile = common.requests_get( '%s://%s/.well-known/webfinger?resource=acct:%s' % (parsed.scheme, parsed.netloc, email), verify=False) endpoint = django_salmon.get_salmon_replies_link( profile.json()) except requests.HTTPError as e: pass if not endpoint: self.error('No salmon endpoint found!', status=400) logging.info('Discovered Salmon endpoint %s', endpoint) # construct reply Atom object self.source_url = resp.source() activity = self.source_obj if self.source_obj.get('verb') not in source.VERBS_WITH_OBJECT: activity = {'object': self.source_obj} entry = atom.activity_to_atom(activity, xml_base=self.source_url) logging.info('Converted %s to Atom:\n%s', self.source_url, entry) # sign reply and wrap in magic envelope domain = urllib.parse.urlparse(self.source_url).netloc key = MagicKey.get_or_create(domain) logging.info('Using key for %s: %s', domain, key) magic_envelope = magicsigs.magic_envelope(entry, common.CONTENT_TYPE_ATOM, key).decode() logging.info('Sending Salmon slap to %s', endpoint) common.requests_post( endpoint, data=common.XML_UTF8 + magic_envelope, headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE}) return True
def template_vars(self, domain=None, url=None): logging.debug(f'Headers: {list(request.headers.items())}') if domain.split('.')[-1] in NON_TLDS: error(f"{domain} doesn't look like a domain", status=404) # find representative h-card. try url, then url's home page, then domain urls = [f'http://{domain}/'] if url: urls = [url, urllib.parse.urljoin(url, '/')] + urls for candidate in urls: resp = common.requests_get(candidate) parsed = util.parse_html(resp) mf2 = util.parse_mf2(parsed, url=resp.url) # logging.debug(f'Parsed mf2 for {resp.url}: {json_dumps(mf2, indent=2)}') hcard = mf2util.representative_hcard(mf2, resp.url) if hcard: logging.info( f'Representative h-card: {json_dumps(hcard, indent=2)}') break else: error( f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}" ) logging.info(f'Generating WebFinger data for {domain}') key = models.MagicKey.get_or_create(domain) props = hcard.get('properties', {}) urls = util.dedupe_urls(props.get('url', []) + [resp.url]) canonical_url = urls[0] acct = f'{domain}@{domain}' for url in urls: if url.startswith('acct:'): urluser, urldomain = util.parse_acct_uri(url) if urldomain == domain: acct = f'{urluser}@{domain}' logging.info(f'Found custom username: acct:{acct}') break # discover atom feed, if any atom = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if atom and atom['href']: atom = urllib.parse.urljoin(resp.url, atom['href']) else: atom = 'https://granary.io/url?' + urllib.parse.urlencode( { 'input': 'html', 'output': 'atom', 'url': resp.url, 'hub': resp.url, }) # discover PuSH, if any for link in resp.headers.get('Link', '').split(','): match = common.LINK_HEADER_RE.match(link) if match and match.group(2) == 'hub': hub = match.group(1) else: hub = 'https://bridgy-fed.superfeedr.com/' # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + acct, 'aliases': urls, 'magic_keys': [{ 'value': key.href() }], 'links': sum(([{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, }] for url in urls if url.startswith("http")), []) + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': get_text(url), } for url in props.get('photo', [])] + [ { 'rel': 'canonical_uri', 'type': 'text/html', 'href': canonical_url, }, # ActivityPub { 'rel': 'self', 'type': common.CONTENT_TYPE_AS2, # WARNING: in python 2 sometimes request.host_url lost port, # http://localhost:8080 would become just http://localhost. no # clue how or why. pay attention here if that happens again. 'href': f'{request.host_url}{domain}', }, { 'rel': 'inbox', 'type': common.CONTENT_TYPE_AS2, 'href': f'{request.host_url}{domain}/inbox', }, # OStatus { 'rel': 'http://schemas.google.com/g/2010#updates-from', 'type': common.CONTENT_TYPE_ATOM, 'href': atom, }, { 'rel': 'hub', 'href': hub, }, { 'rel': 'magic-public-key', 'href': key.href(), }, { 'rel': 'salmon', 'href': f'{request.host_url}{domain}/salmon', } ] }) logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}') return data