def emit_group_rss(self, group=None, groupname=None): md = markdown.Markdown() from feedgen.feed import FeedGenerator fg = FeedGenerator() fg.id('https://h.jonudell.info') fg.title('Hypothesis group %s' % groupname) fg.author({'name': 'Jon Udell', 'email': '*****@*****.**'}) fg.description("Hypothesis notifications for group %s" % groupname) fg.link(href='https://h.jonudell.info/group_rss') fg.language('en') h = Hypothesis(token=self.token, limit=20) ids = self.data() annos = [] for id in ids: try: anno = h.get_annotation(id) assert ('id' in anno.keys()) annos.append(anno) except: print('cannot get %s, deleted?' % id) annos.sort(key=itemgetter('updated'), reverse=True) annos = [HypothesisAnnotation(a) for a in annos] for anno in annos: ref_user = None in_reply_to = None root_id = anno.id if len(anno.references) > 0: try: ref_id = anno.references[-1:][0] root_id = anno.references[0] ref = h.get_annotation(ref_id) ref_user = HypothesisAnnotation(ref).user in_reply_to = '<p>in reply to %s </p>' % ref_user except: print("cannot get user for ref_id %s, deleted?" % ref_id) fe = fg.add_entry() fe.id(anno.id) fe.title('%s annotated %s in the group %s at %s ' % (anno.user, anno.doc_title, groupname, anno.updated)) fe.author({"email": None, "name": anno.user, "uri": None}) dl = "https://hyp.is/%s" % anno.id fe.link({"href": "%s" % dl}) content = '' if ref_user is not None: content += in_reply_to if anno.exact is not None: content += '<p>in reference to: </p> <p> <blockquote><em>%s</em></blockquote></p>' % anno.exact content += '<p> %s <a href="https://hyp.is/%s">said</a>: </p> ' % ( anno.user, root_id) content += '%s ' % md.convert(anno.text) if len(anno.tags): content += '<p>tags: %s' % ', '.join(anno.tags) fe.content(content, type='CDATA') dt = dateutil.parser.parse(anno.updated) dt_tz = dt.replace(tzinfo=pytz.UTC) fe.pubdate(dt_tz) rssfeed = fg.rss_str(pretty=True) # Get the RSS feed as string fg.rss_file('%s.xml' % group) # Write the RSS feed to a file
def notify_facet(self, facet=None, value=None, groupname=None): params = {} params[facet] = value params['max_results'] = 200 h = Hypothesis(token=self.token) rows = list(h.search_all(params)) rows.sort(key=itemgetter('updated')) cache = self.data() for row in rows: new = False anno = HypothesisAnnotation(row) if self.type == 'set': if anno.id not in cache: cache.add(anno.id) new = True if self.type == 'dict': if not value in cache: cache[value] = set() if anno.id not in cache[value]: cache[value].add(anno.id) new = True if new and anno.id not in self.notified_ids: self.notify(anno, groupname=groupname) self.notified_ids.append(anno.id) self.save(cache) return self.notified_ids
def notify_facet(self, facet=None, value=None, groupname=None): params = {'_separate_replies':'true'} params[facet] = value params['limit'] = 200 h_url = Hypothesis().query_url.format(query=urlencode(params)) #print h_url r = None if self.token is not None: h = Hypothesis(token=self.token) r = h.token_authenticated_query(h_url) else: r = requests.get(h_url).json() rows = r['rows'] rows += r['replies'] cache = self.data() rows.sort(key=itemgetter('updated')) for row in rows: new = False anno = HypothesisAnnotation(row) if self.type == 'set': if anno.id not in cache: cache.add(anno.id) new = True if self.type == 'dict': if not value in cache: cache[value] = set() if anno.id not in cache[value]: cache[value].add(anno.id) new = True if new and anno.id not in self.notified_ids: self.notify(anno, groupname=groupname) self.notified_ids.append(anno.id) self.save(cache) return self.notified_ids
def transfer(): """ given a set of urls and users, copy (public) annotations to another users's account """ for url in urls: for source_username in source_usernames: params = {'uri': url} rows = h.search_all(params) for row in list( rows ): # capture the original result set, else it'll keep growing as items are posted! anno = HypothesisAnnotation(row) if anno.user not in source_usernames: continue row['user'] = row['user'].replace(source_username, target_username) permissions = row['permissions'] permission_fields = ['admin', 'update', 'delete'] for field in permission_fields: permissions[field][0] = permissions[field][0].replace( source_username, target_username) row['permissions'] = permissions del row['created'] del row['updated'] del row['id'] h.post_annotation(row)
def export_impl(): h = HypothesisUtils(username=username, token=api_token, group=group, max_results=100000) params = {'group' : h.group } rows = h.search_all(params) annos = [HypothesisAnnotation(row) for row in rows] annotated_urls = defaultdict(list) for anno in annos: annotated_urls[anno.uri].append(anno) output_rows = [] for annotated_url in annotated_urls.keys(): #print(annotated_url) annos = annotated_urls[annotated_url] replies = defaultdict(list) PMID = [] for anno in annos: # gotta build the reply structure and get pmid #print('id:', anno.id) #print('user:'******'exact:', anno.exact) #print('text:', anno.text) #print('tags:', anno.tags) #print('type:', anno.type) #print('references:', anno.references) if anno.references: for reference in anno.references: # shouldn't there only be one??? replies[reference].append(anno) PMID.extend([tag for tag in anno.tags if tag.startswith('PMID:') and '_' not in tag]) # bad tags with PMID:SCR_ #curators didn't put the pmid in as tags :( if anno.text.startswith('PMID:'): # DANGER ZONE if '_' in anno.text: print('PMIDS DONT HAVE UNDERSCORES PROBABLY CURATION BUG', anno.text) else: PMID.append(anno.text.strip()) # because, yep, when you don't tag sometimes you get \n :/ if PMID: if len(PMID) > 1: print(PMID, annotated_url) if PMID[0] == PMID[1]: PMID = PMID[0] print('WARNING: more than one pmid tag') else: raise BaseException('more than one pmid tag') else: PMID = PMID[0] #print(PMID) else: all_tags = [] for a in annos: all_tags.extend(a.tags) #print('NO PMID FOR', annotated_url) #print(set([a.user for a in annos])) #print(all_tags) PMID = annotated_url RRIDs = defaultdict(list) EXACTs = {} CITEs = {} #USERs = {} for anno in annos: RRID = None additional = [] for tag in anno.tags: if re.match('RRID:.+[0-9]+.+', tag): # ARRRRGGGGHHHHHHH ARRRRGGHHHH #if re.match('RRID:.+', tag): # ARRRRGGGGHHHHHHH ARRRRGGHHHH if RRID is not None: raise BaseException('MORE THAN ONE RRID PER ENTRY!') RRID = tag # :/ this works for now but ARHGHHGHASFHAS else: additional.append(tag) # eg Unresolved if tag == 'RRIDCUR:Missing': # fix for bad curation process maybe_rrid = anno.text.strip() if re.match('RRID:.+[0-9]+', maybe_rrid): # ARRRRGGGGHHHHHHH ARRRRGGHHHH RRID = maybe_rrid # RRIDCUR:Missing was already added above if RRID is not None: EXACTs[RRID] = anno.exact.strip() if anno.exact else '' RRIDs[RRID].extend(additional) #USERs[RRID] = anno.user if RRID not in CITEs: if anno.text: if 'Proper Citation:' in anno.text: CITEs[RRID] = anno.text.split('Proper Citation:')[1].strip().split('<',1)[0] if anno.id in replies: for r_anno in replies[anno.id]: RRIDs[RRID].extend(r_anno.tags) # not worrying about the text here elif not anno.references and PMID not in anno.tags: # this is an independent annotation which will not be included new = 'NONE:' + anno.id RRIDs[new].append('') EXACTs[new] = anno.exact #USERs[RRID] = anno.user for rrid, more in RRIDs.items(): #FIXME TOOOOOO SLOW #r = requests.get('https://scicrunch.org/resolver/{RRID}.xml'.format(RRID=rrid)) #if r.status_code < 300: #proper_citation = get_proper_citation(r.content) #else: #proper_citation = '' try: proper_citation = CITEs[rrid] except KeyError: # FIXME this is a hack to avoid some cases of LWW for citations proper_citation = '' if not more: row = [PMID, rrid, '', annotated_url, EXACTs[rrid], proper_citation] output_rows.append(row) else: for val in set(more): # cull dupes row = [PMID, rrid, val, annotated_url, EXACTs[rrid], proper_citation] output_rows.append(row) DATE = date.today().strftime('%Y-%m-%d') return output_rows, DATE