def __init__(self, source_url, target_url): self.source_url = source_url self.target_url = target_url self.error = None self.mention = None self.__doc__ = None # fetch the data from source data = requests.get(self.source_url) if data.status_code == requests.codes.ok: ## check for charater encodings and use 'correct' data if 'charset' in data.headers.get('content-type',''): self.__doc__ = bs4.BeautifulSoup(data.text) else: self.__doc__ = bs4.BeautifulSoup(data.content) # use ronkyuu to find target_url self.source_result = ronkyuu.findMentions(source_url, content=self.__doc__, targetURL=self.target_url ) if self.__doc__ is not None: # source was retrieved if self.target_url in self.source_result['refs']: # mention to target found mf = mf2py.Parser(doc=self.__doc__, url=self.source_url).to_dict() self.mention = parse_mention(self.__doc__, mf, self.source_url, self.target_url) else: # source does not mention target self.mention = None else: # source could no be retreived self.error = {'status':400, 'reason': 'Source URL could not be fetched.'}
def handle_root(): source = request.form['source'] target = request.form['target'] if source == target: return Response(response='source same as target', status=400) if not discoverEndpoint(target)[1]: return Response(response='target does not support webmentions', status=400) # find mention in source result = findMentions(source, target) if result['status'] != 200: return Response(response='error fetching source', status=400) if not result['refs']: return Response(response='target not found in source', status=400) parsed = mf2py.Parser(url=source).to_dict() r = commit_file(webmention_path(source, target), yaml.dump(parsed)) if r.status_code != 201: print('failed to post to github: ' + r.text) raise Exception('failed to post to github: ' + str(r)) return Response(status=201)
def postEntry(self, entry, ctx): source_url = entry.url logger.debug("Finding mentions in: %s" % source_url) refs = ronkyuu.findMentions(source_url) for r in refs.get('refs', []): logger.debug("Sending webmention: %s -> %s" % (source_url, r)) ronkyuu.sendWebmention(source_url, r)
def runTest(self): with HTTMock(bear_im_mock): mentions = findMentions(post_url, ['bear.im']) assert len(mentions['refs']) > 0 assert 'http://indiewebify.me/' in mentions['refs'].keys() assert tantek_url in mentions['refs'].keys()
def __init__(self, *args, **kwargs ): self.past_mentions = kwargs.pop('past_mentions',set()) # use ronkyuu to parse and findmentions result = ronkyuu.findMentions(*args, **kwargs) self.status = result['status'] self.source_url = result['post-url'] self.source_content = result['content'] self.present_mentions = result['refs']
def find_mentions(article, siteurl, content_headers, excluded): source_url = os.path.join(siteurl, article.url) mentionable_content = make_mentionable_input(article, content_headers) return findMentions(source_url, None, exclude_domains=excluded, content=mentionable_content, test_urls=False)
def process(sourceURL, targetURL): """Publish webmention for sourceURL Publishing is handled as a Webmention sent to the /publish URL as the targetURL included somewhere within the sourceURL All you need is to include the following: <a href="https://indieweb.news/publish"></a> somewhere in the sourceURL """ current_app.logger.info('process [%s][%s]' % (sourceURL, targetURL)) result = None mentions = ronkyuu.findMentions(sourceURL) for href in mentions['refs']: current_app.logger.info('process href [%s]' % href) if href != sourceURL and href == targetURL: utcdate = datetime.datetime.utcnow() tzLocal = pytz.timezone('America/New_York') timestamp = tzLocal.localize(utcdate, is_dst=None) postDate = timestamp.strftime('%Y-%m-%dT%H:%M:%S') domain = baseDomain(sourceURL, includeScheme=False) postID = str(uuid.uuid4()) data = { 'source': sourceURL, 'target': targetURL, 'created': postDate, 'updated': postDate, 'postid': postID, 'domain': domain, } event = { 'type': 'publish', 'key': postID, } r = current_app.palala.query('select * from domains where domain = "{domain}"'.format(**data)) if len(r) > 0: current_app.palala.run('update domains set updated = "{updated}" where domain = "{domain}"'.format(domain=domain, updated=postDate)) else: current_app.palala.run('insert into domains (domain, created, updated) values ("{domain}","{created}","{updated}");'.format(domain=domain,created=postDate,updated=postDate)) current_app.palala.run('insert into posts (postid, domain, source, target, created, updated) values ("{postid}","{domain}","{source}","{target}","{created}","{updated}")'.format(**data)) current_app.dbRedis.lpush('indienews-recent', postID) current_app.dbRedis.ltrim('indienews-recent', 0, 50) current_app.dbRedis.rpush('indienews-events', json.dumps(event)) result = data break return result
def mention(sourceURL, targetURL): """Process the Webmention of the targetURL from the sourceURL. To verify that the sourceURL has indeed referenced our targetURL we run findMentions() at it and scan the resulting href list. """ app.logger.info('discovering Webmention endpoint for %s' % sourceURL) mentions = ronkyuu.findMentions(sourceURL) for href in mentions['refs']: if href <> sourceURL and href == targetURL: app.logger.info('post at %s was referenced by %s' % (targetURL, sourceURL)) events.inboundWebmention(sourceURL, targetURL, mentions=mentions)
def mention(sourceURL, targetURL, vouchDomain=None): """Process the Webmention of the targetURL from the sourceURL. To verify that the sourceURL has indeed referenced our targetURL we run findMentions() at it and scan the resulting href list. """ app.logger.info('discovering Webmention endpoint for %s' % sourceURL) mentions = ronkyuu.findMentions(sourceURL) result = False app.logger.info('mentions %s' % mentions) for href in mentions['refs']: if href != sourceURL and href == targetURL: app.logger.info('post at %s was referenced by %s' % (targetURL, sourceURL)) result = processWebmention(sourceURL, targetURL, vouchDomain) app.logger.info('mention() returning %s' % result) return result
def process_webmention(commit_url, source, target): # find mention in source result = findMentions(source, target) if result['status'] != 200: raise Exception('error fetching source') if not result['refs']: raise Exception('target not found in source') parsed = mf2py.Parser(url=source).to_dict() webmention = { 'sourceUrl': source, 'targetUrl': target, 'parsedSource': parsed } r = commit_file(commit_url, yaml.dump(webmention)) if r.status_code != 201: raise Exception('failed to post to github: ' + str(r.status_code) + ', ' + r.text)
def process_webmention(commit_url, source, target): # find mention in source result = findMentions(source, target) if result['status'] != 200: raise Exception('error fetching source') if not result['refs']: raise Exception('target not found in source') parsed = mf2parse(source) webmention = { 'sourceUrl': source, 'targetUrl': target, 'parsedSource': parsed } r = commit_file(commit_url, json.dumps(webmention)) if r.status_code != 201: raise Exception(f'failed to post {commit_url} to github: ' + f'{r.status_code}, {r.text}')
def mention(sourceURL, targetURL, db, log, siteConfigFilename, vouchDomain=None, vouchRequired=False): """Process the Webmention of the targetURL from the sourceURL. To verify that the sourceURL has indeed referenced our targetURL we run findMentions() at it and scan the resulting href list. """ cfg = Config() if os.path.exists(siteConfigFilename): cfg.fromJson(siteConfigFilename) log.info('discovering Webmention endpoint for %s' % sourceURL) mentions = ronkyuu.findMentions(sourceURL) result = False vouched = False log.info('mentions %s' % mentions) for href in mentions['refs']: if href != sourceURL and href == targetURL: log.info('post at %s was referenced by %s' % (targetURL, sourceURL)) if vouchRequired: if vouchDomain is None: vouched = False result = False else: vouched = processVouch(cfg.paths.content, sourceURL, targetURL, vouchDomain) result = vouched else: vouched = False result = True if result: utcdate = datetime.datetime.utcnow() tzLocal = pytz.timezone('America/New_York') timestamp = tzLocal.localize(utcdate, is_dst=None) mf2Data = Parser(doc=mentions['content']).to_dict() hcard = extractHCard(mf2Data) data = { 'sourceURL': sourceURL, 'targetURL': targetURL, 'vouchDomain': vouchDomain, 'vouched': vouched, 'postDate': timestamp.strftime('%Y-%m-%dT%H:%M:%S'), 'hcard': hcard, 'mf2data': mf2Data, 'siteConfig': cfg, } key = 'webmention::%s::%s' % ( timestamp.strftime('%Y%m%d%H%M%S'), targetURL) event = { 'type': 'webmention', 'key': key, } # mentionData['hcardName'] = hcard['name'] # mentionData['hcardURL'] = hcard['url'] # mentionData['mf2data'] = mf2Data # sData = json.dumps(mentionData) # safeID = generateSafeName(sourceURL) # if db is not None: # db.set('mention::%s' % safeID, sData) # targetFile = os.path.join(domainCfg.basepath, safeID) # with open(targetFile, 'a+') as h: # h.write(sData) # mentionFile = generateMentionName(targetURL, result) # with open(mentionFile, 'w') as h: # h.write(_mention % mentionData) db.set(key, json.dumps(data)) db.rpush('kaku-events', json.dumps(event)) log.info('mention() returning %s' % result) return result, vouched
parser.add_argument('sourceURL') parser.add_argument('--vouch', default=None) parser.add_argument('--eventConfigFile', default=None) args = parser.parse_args() cfg = ronkyuu.discoverConfig(args.eventConfigFile) domains = [] # cfg.get('domains', []) sourceURL = args.sourceURL vouchDomain = args.vouch print('Scanning %s for mentions' % sourceURL) if vouchDomain is not None: print('vouch domain present and will be sent') mentions = ronkyuu.findMentions(sourceURL, domains) print(mentions['refs']) for href in mentions['refs']: if sourceURL != href: print(href) wmStatus, wmUrl = ronkyuu.discoverEndpoint(href, test_urls=False) if wmUrl is not None and wmStatus == 200: print('\tfound webmention endpoint %s for %s' % (wmUrl, href)) status_code = ronkyuu.sendWebmention(sourceURL, href, wmUrl, vouchDomain=vouchDomain) if status_code == requests.codes.ok:
def runTest(self): with HTTMock(mock_response): mentions = findMentions(post_url, exclude_domains=['bear.im']) assert len(mentions['refs']) > 0 assert 'http://indiewebify.me/' in mentions['refs']
def checkOutboundWebmentions(sourceURL, html, targetFile, update=False): logger.info('checking for outbound webmentions [%s]' % sourceURL) try: cached = loadOutboundWebmentions(targetFile) found = ronkyuu.findMentions(sourceURL, content=html) mentions = {} # loop thru webmentions found in our post and # check if they are new/updated or already seen for href in found['refs']: if sourceURL != href: logger.info(href) key = 'webmention::%s::%s' % (sourceURL, href) keySeen = db.exists(key) if keySeen: if update: keySeen = False s = 'update forced' else: s = 'already processed' else: s = 'new mention' logger.info('\t%s [%s]' % (s, key)) mentions[key] = { 'key': key, 'href': href, 'keySeen': keySeen, 'removed': False } # loop thru found webmentions and check against cache for any removed for key in cached: if key not in mentions: mentions[key] = cached[key] mentions[key]['removed'] = True if 'keySeen' not in mentions[key]: mentions[key]['keySeen'] = False removed = [] for key in mentions: mention = mentions[key] logger.info('seen: %(keySeen)s removed: %(removed)s [%(key)s]' % mention) # send webmentions for new/updated or removed if mention['removed'] or not mention['keySeen']: if mention['removed']: removed.append(key) href = mention['href'] wmStatus, wmUrl, debug = ronkyuu.discoverEndpoint(href, test_urls=False, debug=True) logger.info('webmention endpoint discovery: %s [%s]' % (wmStatus, wmUrl)) if len(debug) > 0: logger.info('\n\tdebug: '.join(debug)) if wmUrl is not None and wmStatus == 200: logger.info('\tfound webmention endpoint %s for %s' % (wmUrl, href)) resp, debug = ronkyuu.sendWebmention(sourceURL, href, wmUrl, debug=True) if len(debug) > 0: logger.info('\n\tdebug: '.join(debug)) if resp.status_code == requests.codes.ok: if key not in cached: cached[key] = { 'key': key, 'href': href, 'wmUrl': wmUrl, 'status': resp.status_code } if len(resp.history) == 0: db.set(key, resp.status_code) logger.info('\twebmention sent successfully') else: logger.info('\twebmention POST was redirected') else: logger.info('\twebmention send returned a status code of %s' % resp.status_code) for key in removed: del cached[key] db.delete(key) saveOutboundWebmentions(targetFile, cached) except: logger.exception('exception during checkOutboundWebmentions')
parser.add_argument('sourceURL') parser.add_argument('--vouch', default=None) parser.add_argument('--eventConfigFile', default=None) args = parser.parse_args() cfg = ronkyuu.discoverConfig(args.eventConfigFile) domains = [] # cfg.get('domains', []) sourceURL = args.sourceURL vouchDomain = args.vouch print('Scanning %s for mentions' % sourceURL) if vouchDomain is not None: print('vouch domain present and will be sent') mentions = ronkyuu.findMentions(sourceURL, domains) print(mentions['refs']) for href in mentions['refs']: if sourceURL != href: print(href) wmStatus, wmUrl = ronkyuu.discoverEndpoint(href, test_urls=False) if wmUrl is not None and wmStatus == 200: print('\tfound webmention endpoint %s for %s' % (wmUrl, href)) status_code = ronkyuu.sendWebmention(sourceURL, href, wmUrl, vouchDomain=vouchDomain) if status_code == requests.codes.ok: print('\twebmention sent successfully') else: print('\twebmention send returned a status code of %s' % status_code)
def checkOutboundWebmentions(sourceURL, html, targetFile, update=False): logger.info('checking for outbound webmentions [%s]' % sourceURL) try: cached = loadOutboundWebmentions(targetFile) found = ronkyuu.findMentions(sourceURL, content=html) mentions = {} # loop thru webmentions found in our post and # check if they are new/updated or already seen for href in found['refs']: if sourceURL != href: logger.info(href) key = 'webmention::%s::%s' % (sourceURL, href) keySeen = db.exists(key) if keySeen: if update: keySeen = False s = 'update forced' else: s = 'already processed' else: s = 'new mention' logger.info('\t%s [%s]' % (s, key)) mentions[key] = { 'key': key, 'href': href, 'keySeen': keySeen, 'removed': False } # loop thru found webmentions and check against cache for any removed for key in cached: if key not in mentions: mentions[key] = cached[key] mentions[key]['removed'] = True if 'keySeen' not in mentions[key]: mentions[key]['keySeen'] = False removed = [] for key in mentions: mention = mentions[key] logger.info('seen: %(keySeen)s removed: %(removed)s [%(key)s]' % mention) # send webmentions for new/updated or removed if mention['removed'] or not mention['keySeen']: if mention['removed']: removed.append(key) href = mention['href'] wmStatus, wmUrl, debug = ronkyuu.discoverEndpoint( href, test_urls=False, debug=True) logger.info('webmention endpoint discovery: %s [%s]' % (wmStatus, wmUrl)) if len(debug) > 0: logger.info('\n\tdebug: '.join(debug)) if wmUrl is not None and wmStatus == 200: logger.info('\tfound webmention endpoint %s for %s' % (wmUrl, href)) resp, debug = ronkyuu.sendWebmention(sourceURL, href, wmUrl, debug=True) if len(debug) > 0: logger.info('\n\tdebug: '.join(debug)) if resp.status_code == requests.codes.ok: if key not in cached: cached[key] = { 'key': key, 'href': href, 'wmUrl': wmUrl, 'status': resp.status_code } if len(resp.history) == 0: db.set(key, resp.status_code) logger.info('\twebmention sent successfully') else: logger.info('\twebmention POST was redirected') else: logger.info( '\twebmention send returned a status code of %s' % resp.status_code) for key in removed: del cached[key] db.delete(key) saveOutboundWebmentions(targetFile, cached) except: logger.exception('exception during checkOutboundWebmentions')
def mention(sourceURL, targetURL, vouchDomain=None): """Process the incoming Webmention from the sourceURL. To verify that the targetURL being referenced by the sourceURL is a valid reference we run findMentions() at it and scan the resulting href list. This does the following checks: 1. The sourceURL exists 2. The sourceURL indeed does reference our targetURL 3. The sourceURL is a valid Vouch (if configured to check) 4. The sourceURL is active and not deleted, if deleted then remove it from our list of mentions for targetURL """ current_app.logger.info('handling Webmention from %s' % sourceURL) try: result = False vouched = False mentions = ronkyuu.findMentions(sourceURL) current_app.logger.info('mentions %s' % mentions) if mentions['status'] == 410: data = { 'targetURL': targetURL, 'sourceURL': sourceURL } current_app.logger.info('mention removal event from [%s] of [%s]' % (targetURL, sourceURL)) kakuEvent('mention', 'deleted', data) else: for href in mentions['refs']: if href != sourceURL and href == targetURL: current_app.logger.info('post at %s was referenced by %s' % (targetURL, sourceURL)) if current_app.config['VOUCH_REQUIRED']: if vouchDomain is None: vouched = False result = False else: vouched = processVouch(sourceURL, targetURL, vouchDomain) result = vouched else: vouched = False result = True if result: utcdate = datetime.datetime.utcnow() tzLocal = pytz.timezone('America/New_York') timestamp = tzLocal.localize(utcdate, is_dst=None) mf2Data = Parser(doc=mentions['content']).to_dict() hcard = extractHCard(mf2Data) data = { 'sourceURL': sourceURL, 'targetURL': targetURL, 'vouchDomain': vouchDomain, 'vouched': vouched, 'postDate': timestamp.strftime('%Y-%m-%dT%H:%M:%S'), 'hcard': hcard, 'mf2data': mf2Data, } current_app.logger.info('mention created for [%s] from [%s]' % (targetURL, sourceURL)) current_app.logger.info(json.dumps(data, indent=2)) kakuEvent('mention', 'create', data) current_app.logger.info('mention() returning %s' % result) except ValueError: current_app.logger.exception('Exception raised during webmention processing') result = False return result, vouched