예제 #1
0
    def emit_group_rss(self, group=None, groupname=None):
        md = markdown.Markdown()
        from feedgen.feed import FeedGenerator
        fg = FeedGenerator()
        fg.id('https://h.jonudell.info')
        fg.title('Hypothesis group %s' % groupname)
        fg.author({'name': 'Jon Udell', 'email': '*****@*****.**'})
        fg.description("Hypothesis notifications for group %s" % groupname)
        fg.link(href='https://h.jonudell.info/group_rss')
        fg.language('en')
        h = Hypothesis(token=self.token, limit=20)
        ids = self.data()
        annos = []
        for id in ids:
            try:
                anno = h.get_annotation(id)
                assert ('id' in anno.keys())
                annos.append(anno)
            except:
                print('cannot get %s, deleted?' % id)
            annos.sort(key=itemgetter('updated'), reverse=True)
        annos = [HypothesisAnnotation(a) for a in annos]
        for anno in annos:
            ref_user = None
            in_reply_to = None
            root_id = anno.id
            if len(anno.references) > 0:
                try:
                    ref_id = anno.references[-1:][0]
                    root_id = anno.references[0]
                    ref = h.get_annotation(ref_id)
                    ref_user = HypothesisAnnotation(ref).user
                    in_reply_to = '<p>in reply to %s </p>' % ref_user
                except:
                    print("cannot get user for ref_id %s, deleted?" % ref_id)
            fe = fg.add_entry()
            fe.id(anno.id)
            fe.title('%s annotated %s in the group %s at %s ' %
                     (anno.user, anno.doc_title, groupname, anno.updated))
            fe.author({"email": None, "name": anno.user, "uri": None})
            dl = "https://hyp.is/%s" % anno.id
            fe.link({"href": "%s" % dl})
            content = ''
            if ref_user is not None:
                content += in_reply_to
            if anno.exact is not None:
                content += '<p>in reference to: </p> <p> <blockquote><em>%s</em></blockquote></p>' % anno.exact
            content += '<p> %s <a href="https://hyp.is/%s">said</a>: </p> ' % (
                anno.user, root_id)
            content += '%s ' % md.convert(anno.text)
            if len(anno.tags):
                content += '<p>tags: %s' % ', '.join(anno.tags)
            fe.content(content, type='CDATA')
            dt = dateutil.parser.parse(anno.updated)
            dt_tz = dt.replace(tzinfo=pytz.UTC)
            fe.pubdate(dt_tz)

        rssfeed = fg.rss_str(pretty=True)  # Get the RSS feed as string
        fg.rss_file('%s.xml' % group)  # Write the RSS feed to a file
예제 #2
0
 def notify_facet(self, facet=None, value=None, groupname=None):
     params = {}
     params[facet] = value
     params['max_results'] = 200
     h = Hypothesis(token=self.token)
     rows = list(h.search_all(params))
     rows.sort(key=itemgetter('updated'))
     cache = self.data()
     for row in rows:
         new = False
         anno = HypothesisAnnotation(row)
         if self.type == 'set':
             if anno.id not in cache:
                 cache.add(anno.id)
                 new = True
         if self.type == 'dict':
             if not value in cache:
                 cache[value] = set()
             if anno.id not in cache[value]:
                 cache[value].add(anno.id)
                 new = True
         if new and anno.id not in self.notified_ids:
             self.notify(anno, groupname=groupname)
             self.notified_ids.append(anno.id)
     self.save(cache)
     return self.notified_ids
예제 #3
0
 def notify_facet(self, facet=None, value=None, groupname=None):
     params = {'_separate_replies':'true'}
     params[facet] = value
     params['limit'] = 200
     h_url = Hypothesis().query_url.format(query=urlencode(params))
     #print h_url
     r = None
     if self.token is not None:
         h = Hypothesis(token=self.token)
         r = h.token_authenticated_query(h_url)
     else:
         r = requests.get(h_url).json()
     rows = r['rows']
     rows += r['replies']
     cache = self.data()
     rows.sort(key=itemgetter('updated'))
     for row in rows:
         new = False
         anno = HypothesisAnnotation(row)
         if self.type == 'set':
             if anno.id not in cache:
                 cache.add(anno.id)
                 new = True
         if self.type == 'dict':
             if not value in cache:
                 cache[value] = set()
             if anno.id not in cache[value]:
                 cache[value].add(anno.id) 
                 new = True
         if new and anno.id not in self.notified_ids:
             self.notify(anno, groupname=groupname)
             self.notified_ids.append(anno.id)
     self.save(cache)
     return self.notified_ids
예제 #4
0
def transfer():
    """ given a set of urls and users, copy (public) annotations to another users's account """
    for url in urls:
        for source_username in source_usernames:
            params = {'uri': url}
            rows = h.search_all(params)
            for row in list(
                    rows
            ):  # capture the original result set, else it'll keep growing as items are posted!
                anno = HypothesisAnnotation(row)
                if anno.user not in source_usernames:
                    continue
                row['user'] = row['user'].replace(source_username,
                                                  target_username)
                permissions = row['permissions']
                permission_fields = ['admin', 'update', 'delete']
                for field in permission_fields:
                    permissions[field][0] = permissions[field][0].replace(
                        source_username, target_username)
                row['permissions'] = permissions
                del row['created']
                del row['updated']
                del row['id']
                h.post_annotation(row)
예제 #5
0
def export_impl():
    h = HypothesisUtils(username=username, token=api_token, group=group, max_results=100000)
    params = {'group' : h.group }
    rows = h.search_all(params)
    annos = [HypothesisAnnotation(row) for row in rows]
    annotated_urls = defaultdict(list)
    for anno in annos:
        annotated_urls[anno.uri].append(anno)

    output_rows = []
    for annotated_url in annotated_urls.keys():
        #print(annotated_url)
        annos = annotated_urls[annotated_url]
        replies = defaultdict(list)
        PMID = []
        for anno in annos:  # gotta build the reply structure and get pmid
            #print('id:', anno.id)
            #print('user:'******'exact:', anno.exact)
            #print('text:', anno.text)
            #print('tags:', anno.tags)
            #print('type:', anno.type)
            #print('references:', anno.references)
            if anno.references:
                for reference in anno.references:  # shouldn't there only be one???
                    replies[reference].append(anno)
            PMID.extend([tag for tag in anno.tags if tag.startswith('PMID:') and '_' not in tag])  # bad tags with PMID:SCR_
            #curators didn't put the pmid in as tags :(
            if anno.text.startswith('PMID:'):  # DANGER ZONE
                if '_' in anno.text:
                    print('PMIDS DONT HAVE UNDERSCORES PROBABLY CURATION BUG', anno.text)
                else:
                    PMID.append(anno.text.strip())  # because, yep, when you don't tag sometimes you get \n :/

        if PMID:
            if len(PMID) > 1:
                print(PMID, annotated_url)
                if PMID[0] == PMID[1]:
                    PMID = PMID[0]
                    print('WARNING: more than one pmid tag')
                else:
                    raise BaseException('more than one pmid tag')
            else:
                PMID = PMID[0]
                #print(PMID)
        else:
            all_tags = []
            for a in annos:
                all_tags.extend(a.tags)
            #print('NO PMID FOR', annotated_url)
            #print(set([a.user for a in annos]))
            #print(all_tags)
            PMID = annotated_url

        RRIDs = defaultdict(list)
        EXACTs = {}
        CITEs = {}
        #USERs = {}
        for anno in annos:
            RRID = None
            additional = []
            for tag in anno.tags:
                if re.match('RRID:.+[0-9]+.+', tag):  # ARRRRGGGGHHHHHHH ARRRRGGHHHH
                #if re.match('RRID:.+', tag):  # ARRRRGGGGHHHHHHH ARRRRGGHHHH
                    if RRID is not None:
                        raise BaseException('MORE THAN ONE RRID PER ENTRY!')
                    RRID = tag  # :/ this works for now but ARHGHHGHASFHAS
                else:
                    additional.append(tag)  # eg Unresolved

                if tag == 'RRIDCUR:Missing':  # fix for bad curation process
                    maybe_rrid = anno.text.strip()
                    if re.match('RRID:.+[0-9]+', maybe_rrid):  # ARRRRGGGGHHHHHHH ARRRRGGHHHH
                        RRID = maybe_rrid  # RRIDCUR:Missing was already added above

            if RRID is not None:
                EXACTs[RRID] = anno.exact.strip() if anno.exact else ''
                RRIDs[RRID].extend(additional)
                #USERs[RRID] = anno.user
                if RRID not in CITEs:
                    if anno.text:
                        if 'Proper Citation:' in anno.text:
                            CITEs[RRID] = anno.text.split('Proper Citation:')[1].strip().split('<',1)[0]

                if anno.id in replies:
                    for r_anno in replies[anno.id]:
                        RRIDs[RRID].extend(r_anno.tags)  # not worrying about the text here
            elif not anno.references and PMID not in anno.tags:  # this is an independent annotation which will not be included
                new = 'NONE:' + anno.id
                RRIDs[new].append('')
                EXACTs[new] = anno.exact
                #USERs[RRID] = anno.user

        for rrid, more in RRIDs.items():
            #FIXME TOOOOOO SLOW
            #r = requests.get('https://scicrunch.org/resolver/{RRID}.xml'.format(RRID=rrid))
            #if r.status_code < 300:
                #proper_citation = get_proper_citation(r.content)
            #else:
                #proper_citation = ''

            try:
                proper_citation = CITEs[rrid]
            except KeyError:  # FIXME this is a hack to avoid some cases of LWW for citations
                proper_citation = ''

            if not more:
                row = [PMID, rrid, '', annotated_url, EXACTs[rrid], proper_citation]
                output_rows.append(row)
            else:
                for val in set(more):  # cull dupes
                    row = [PMID, rrid, val, annotated_url, EXACTs[rrid], proper_citation]
                    output_rows.append(row)

    DATE = date.today().strftime('%Y-%m-%d')
    return output_rows, DATE