def main(): url = 'https://data-bs.ch/lufthygiene/nmbs_pm25/airmet_bs_museum_pm25_aktuell.csv' print(f'Downloading data from {url}...') urllib3.disable_warnings() df = common.pandas_read_csv(url, sep=';', encoding='cp1252', skiprows=range(1, 2)) print(f'Calculating ISO8601 time string...') df['timestamp'] = pd.to_datetime( df.Anfangszeit, format='%d.%m.%Y %H:%M:%S').dt.tz_localize('Europe/Zurich', ambiguous='infer', nonexistent='shift_forward') # We simplify the code and re-push all current data all the time instead of checking for the latest timestamp in ODS. # print(f'Reading latest timestamp from ODS dataset...') # urllib3.disable_warnings() # r = common.requests_get('https://data.bs.ch/api/records/1.0/search/?dataset=100100&q=&rows=1&sort=anfangszeit', verify=False) # r.raise_for_status() # latest_ods_timestamp = r.json()['records'][0]['fields']['anfangszeit'] # print(f'Latest timestamp is {latest_ods_timestamp}.') # print(f'Filtering data after {latest_ods_timestamp} for submission to ODS via realtime API...') # realtime_df = df[df['timestamp'] > latest_ods_timestamp] # print(f'Pushing {realtime_df.timestamp.count()} rows to ODS realtime API...') realtime_df = df if len(realtime_df) == 0: print(f'No rows to push to ODS... ') else: print(f'Dropping empty values...') realtime_df.PM25_Sensirion = realtime_df.PM25_Sensirion.replace( ' ', numpy.nan) realtime_df = realtime_df.dropna(subset=['PM25_Sensirion']) # Realtime API bootstrap data: # { # "anfangszeit": "23.02.2021 10:30:00", # "pm25": 13.3 # } payload = (realtime_df.rename(columns={ 'Anfangszeit': 'anfangszeit', 'PM25_Sensirion': 'pm25' })[['anfangszeit', 'pm25']].to_json(orient="records")) print( f'Pushing {realtime_df.Anfangszeit.count()} rows to ODS realtime API...' ) # print(f'Pushing the following data to ODS: {json.dumps(json.loads(payload), indent=4)}') # use data=payload here because payload is a string. If it was an object, we'd have to use json=payload. r = common.requests_post(url=credentials.ods_live_push_api_url, data=payload, verify=False) r.raise_for_status() print('Job successful!')
def send(activity, inbox_url, user_domain): """Sends an ActivityPub request to an inbox. Args: activity: dict, AS2 activity inbox_url: string user_domain: string, domain of the bridgy fed user sending the request Returns: requests.Response """ logging.info( 'Sending AP request from {user_domain}: {json_dumps(activity, indent=2)}' ) # prepare HTTP Signature (required by Mastodon) # https://w3c.github.io/activitypub/#authorization # https://tools.ietf.org/html/draft-cavage-http-signatures-07 # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846 acct = 'acct:%s@%s' % (user_domain, user_domain) key = MagicKey.get_or_create(user_domain) auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct, algorithm='rsa-sha256', sign_header='signature', headers=('Date', 'Digest', 'Host')) # deliver to inbox body = json_dumps(activity).encode() headers = { 'Content-Type': common.CONTENT_TYPE_AS2, # required for HTTP Signature # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3 'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'), # required by Mastodon # https://github.com/tootsuite/mastodon/pull/14556#issuecomment-674077648 'Digest': 'SHA-256=' + b64encode(sha256(body).digest()).decode(), 'Host': util.domain_from_link(inbox_url), } return common.requests_post(inbox_url, data=body, auth=auth, headers=headers)
def send(activity, inbox_url, user_domain): """Sends an ActivityPub request to an inbox. Args: activity: dict, AS2 activity inbox_url: string user_domain: string, domain of the bridgy fed user sending the request Returns: requests.Response """ logging.info('Sending AP request from %s: %s', user_domain, json.dumps(activity, indent=2)) # prepare HTTP Signature (required by Mastodon) # https://w3c.github.io/activitypub/#authorization-lds # https://tools.ietf.org/html/draft-cavage-http-signatures-07 # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846 acct = 'acct:%s@%s' % (user_domain, user_domain) key = MagicKey.get_or_create(user_domain) auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct, algorithm='rsa-sha256') # deliver to inbox headers = { 'Content-Type': common.CONTENT_TYPE_AS2, # required for HTTP Signature # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3 'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'), } return common.requests_post(inbox_url, json=activity, auth=auth, headers=headers)
def _try_salmon(self, resp): """ Args: resp: Response """ # fetch target HTML page, extract Atom rel-alternate link target = resp.target() if not self.target_resp: self.target_resp = common.requests_get(target) parsed = util.parse_html(self.target_resp) atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if not atom_url or not atom_url.get('href'): self.error('Target post %s has no Atom link' % resp.target(), status=400) # fetch Atom target post, extract and inject id into source object base_url = '' base = parsed.find('base') if base and base.get('href'): base_url = base['href'] atom_link = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) atom_url = urllib.parse.urljoin( resp.target(), urllib.parse.urljoin(base_url, atom_link['href'])) feed = common.requests_get(atom_url).text parsed = feedparser.parse(feed) logging.info('Parsed: %s', json_dumps(parsed, indent=2)) entry = parsed.entries[0] target_id = entry.id in_reply_to = self.source_obj.get('inReplyTo') source_obj_obj = self.source_obj.get('object') if in_reply_to: for elem in in_reply_to: if elem.get('url') == target: elem['id'] = target_id elif isinstance(source_obj_obj, dict): source_obj_obj['id'] = target_id # Mastodon (and maybe others?) require a rel-mentioned link to the # original post's author to make it show up as a reply: # app/services/process_interaction_service.rb # ...so add them as a tag, which atom renders as a rel-mention link. authors = entry.get('authors', None) if authors: url = entry.authors[0].get('href') if url: self.source_obj.setdefault('tags', []).append({'url': url}) # extract and discover salmon endpoint logging.info('Discovering Salmon endpoint in %s', atom_url) endpoint = django_salmon.discover_salmon_endpoint(feed) if not endpoint: # try webfinger parsed = urllib.parse.urlparse(resp.target()) # TODO: test missing email author = entry.get('author_detail', {}) email = author.get('email') or '@'.join( (author.get('name', ''), parsed.netloc)) try: # TODO: always https? profile = common.requests_get( '%s://%s/.well-known/webfinger?resource=acct:%s' % (parsed.scheme, parsed.netloc, email), verify=False) endpoint = django_salmon.get_salmon_replies_link( profile.json()) except requests.HTTPError as e: pass if not endpoint: self.error('No salmon endpoint found!', status=400) logging.info('Discovered Salmon endpoint %s', endpoint) # construct reply Atom object self.source_url = resp.source() activity = self.source_obj if self.source_obj.get('verb') not in source.VERBS_WITH_OBJECT: activity = {'object': self.source_obj} entry = atom.activity_to_atom(activity, xml_base=self.source_url) logging.info('Converted %s to Atom:\n%s', self.source_url, entry) # sign reply and wrap in magic envelope domain = urllib.parse.urlparse(self.source_url).netloc key = MagicKey.get_or_create(domain) logging.info('Using key for %s: %s', domain, key) magic_envelope = magicsigs.magic_envelope(entry, common.CONTENT_TYPE_ATOM, key).decode() logging.info('Sending Salmon slap to %s', endpoint) common.requests_post( endpoint, data=common.XML_UTF8 + magic_envelope, headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE}) return True
def try_activitypub(self): source = util.get_required_param(self, 'source') # fetch source page, convert to ActivityStreams source_resp = common.requests_get(source) source_url = source_resp.url or source source_mf2 = mf2py.parse(source_resp.text, url=source_url) # logging.debug('Parsed mf2 for %s: %s', source_resp.url, json.dumps(source_mf2, indent=2)) entry = mf2util.find_first_entry(source_mf2, ['h-entry']) logging.info('First entry: %s', json.dumps(entry, indent=2)) # make sure it has url, since we use that for AS2 id, which is required # for ActivityPub. props = entry.setdefault('properties', {}) if not props.get('url'): props['url'] = [source_url] source_obj = microformats2.json_to_object(entry, fetch_mf2=True) logging.info('Converted to AS: %s', json.dumps(source_obj, indent=2)) # fetch target page as AS object. target is first in-reply-to, like-of, # or repost-of, *not* target query param.) target = util.get_url(util.get_first(source_obj, 'inReplyTo') or util.get_first(source_obj, 'object')) if not target: common.error(self, 'No u-in-reply-to, u-like-of, or u-repost-of ' 'found in %s' % source_url) try: target_resp = common.get_as2(target) except (requests.HTTPError, exc.HTTPBadGateway) as e: if (e.response.status_code // 100 == 2 and common.content_type(e.response).startswith('text/html')): self.resp = Response.get_or_create( source=source_url, target=e.response.url or target, direction='out', source_mf2=json.dumps(source_mf2)) return self.send_salmon(source_obj, target_resp=e.response) raise target_url = target_resp.url or target self.resp = Response.get_or_create( source=source_url, target=target_url, direction='out', protocol='activitypub', source_mf2=json.dumps(source_mf2)) # find actor's inbox target_obj = target_resp.json() inbox_url = target_obj.get('inbox') if not inbox_url: # TODO: test actor/attributedTo and not, with/without inbox actor = target_obj.get('actor') or target_obj.get('attributedTo') if isinstance(actor, dict): inbox_url = actor.get('inbox') actor = actor.get('url') if not inbox_url and not actor: common.error(self, 'Target object has no actor or attributedTo URL') if not inbox_url: # fetch actor as AS object actor = common.get_as2(actor).json() inbox_url = actor.get('inbox') if not inbox_url: # TODO: probably need a way to save errors like this so that we can # return them if ostatus fails too. # common.error(self, 'Target actor has no inbox') return self.send_salmon(source_obj, target_resp=target_resp) # convert to AS2 source_domain = urlparse.urlparse(source_url).netloc key = MagicKey.get_or_create(source_domain) source_activity = common.postprocess_as2( as2.from_as1(source_obj), target=target_obj, key=key) if self.resp.status == 'complete': source_activity['type'] = 'Update' # prepare HTTP Signature (required by Mastodon) # https://w3c.github.io/activitypub/#authorization-lds # https://tools.ietf.org/html/draft-cavage-http-signatures-07 # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846 acct = 'acct:%s@%s' % (source_domain, source_domain) auth = HTTPSignatureAuth(secret=key.private_pem(), key_id=acct, algorithm='rsa-sha256') # deliver source object to target actor's inbox. headers = { 'Content-Type': common.CONTENT_TYPE_AS2, # required for HTTP Signature # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3 'Date': datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'), } inbox_url = urlparse.urljoin(target_url, inbox_url) resp = common.requests_post(inbox_url, json=source_activity, auth=auth, headers=headers) self.response.status_int = resp.status_code if resp.status_code == 202: self.response.write('202 response! If this is Mastodon 1.x, their ' 'signature verification probably failed. :(\n') self.response.write(resp.text)
def send_salmon(self, source_obj, target_resp=None): self.resp.protocol = 'ostatus' # fetch target HTML page, extract Atom rel-alternate link if not target_resp: target_resp = common.requests_get(self.resp.target()) parsed = BeautifulSoup(target_resp.content, from_encoding=target_resp.encoding) atom_url = parsed.find('link', rel='alternate', type=common.CONTENT_TYPE_ATOM) if not atom_url or not atom_url.get('href'): common.error(self, 'Target post %s has no Atom link' % self.resp.target(), status=400) # fetch Atom target post, extract and inject id into source object feed = common.requests_get(atom_url['href']).text parsed = feedparser.parse(feed) logging.info('Parsed: %s', json.dumps(parsed, indent=2, default=lambda key: '-')) entry = parsed.entries[0] target_id = entry.id in_reply_to = source_obj.get('inReplyTo') source_obj_obj = source_obj.get('object') if in_reply_to: in_reply_to[0]['id'] = target_id elif isinstance(source_obj_obj, dict): source_obj_obj['id'] = target_id # Mastodon (and maybe others?) require a rel-mentioned link to the # original post's author to make it show up as a reply: # app/services/process_interaction_service.rb # ...so add them as a tag, which atom renders as a rel-mention link. authors = entry.get('authors', None) if authors: url = entry.authors[0].get('href') if url: source_obj.setdefault('tags', []).append({'url': url}) # extract and discover salmon endpoint logging.info('Discovering Salmon endpoint in %s', atom_url['href']) endpoint = django_salmon.discover_salmon_endpoint(feed) if not endpoint: # try webfinger parsed = urlparse.urlparse(self.resp.target()) # TODO: test missing email email = entry.author_detail.get('email') or '@'.join( (entry.author_detail.name, parsed.netloc)) try: # TODO: always https? resp = common.requests_get( '%s://%s/.well-known/webfinger?resource=acct:%s' % (parsed.scheme, parsed.netloc, email), verify=False) endpoint = django_salmon.get_salmon_replies_link(resp.json()) except requests.HTTPError as e: pass if not endpoint: common.error(self, 'No salmon endpoint found!', status=400) logging.info('Discovered Salmon endpoint %s', endpoint) # construct reply Atom object source_url = self.resp.source() activity = (source_obj if source_obj.get('verb') in source.VERBS_WITH_OBJECT else {'object': source_obj}) entry = atom.activity_to_atom(activity, xml_base=source_url) logging.info('Converted %s to Atom:\n%s', source_url, entry) # sign reply and wrap in magic envelope domain = urlparse.urlparse(source_url).netloc key = MagicKey.get_or_create(domain) logging.info('Using key for %s: %s', domain, key) magic_envelope = magicsigs.magic_envelope( entry, common.CONTENT_TYPE_ATOM, key) logging.info('Sending Salmon slap to %s', endpoint) common.requests_post( endpoint, data=common.XML_UTF8 + magic_envelope, headers={'Content-Type': common.CONTENT_TYPE_MAGIC_ENVELOPE})
# print(f'Filtering data after {latest_ods_value} for submission to ODS via realtime API...') # realtime_df = merged_df[merged_df['timestamp'] > latest_ods_value] realtime_df = merged_df if len(realtime_df) == 0: print(f'No rows to push to ODS... ') else: # Realtime API bootstrap data: # { # "timestamp": "2020-07-28T01:35:00+02:00", # "pegel": "245.16", # "abfluss": "591.2" # } # only keep columns that need to be pushed, and rename if necessary. realtime_df = realtime_df[['timestamp_text', 'pegel', 'abfluss']] realtime_df = realtime_df.rename(columns={'timestamp_text': 'timestamp'}) payload = realtime_df.to_json(orient="records") print( f'Pushing {realtime_df.timestamp.count()} rows to ODS realtime API...') # print(f'Pushing the following data to ODS: {json.dumps(json.loads(payload), indent=4)}') # use data=payload here because payload is a string. If it was an object, we'd have to use json=payload. r = common.requests_post(url=credentials.ods_live_push_api_url, data=payload, verify=False) r.raise_for_status() print('Job successful!')