assert incoming.get_hashtag_dict(txt)['paper'] == [paper4] spnetPaper = core.DoiPaperData(DOI='10.3389/fncom.2012.00001', insertNew='findOrInsert').parent assert spnetPaper.title.lower( ) == 'open peer review by a selected-papers network' txt = 'a long comment ' + spnetPaper.doi.get_doctag() + ', some more text' assert incoming.get_hashtag_dict(txt)['paper'] == [spnetPaper] ## t = 'this is text #spnetwork #recommend #arxiv_1302_4871 #pubmed_22291635 #cosmology' ## d = incoming.get_hashtag_dict(t) ## assert d == {'header': ['spnetwork'], 'topic': ['cosmology'], 'paper': [paper1, spnetPaper], 'rec': ['recommend']} ## t = 'this is text #spnetwork #recommend arXiv:1302.4871 PMID: 22291635 #cosmology' ## d = incoming.get_hashtag_dict(t) ## assert d == {'header': ['spnetwork'], 'topic': ['cosmology'], 'paper': [paper1, spnetPaper], 'rec': ['recommend']} t = 'this is text #spnetwork #recommend doi: 10.3389/fncom.2012.00001 i like doi: this #cosmology' d = incoming.get_hashtag_dict(t) assert d == { 'header': ['spnetwork'], 'topic': ['cosmology'], 'paper': [spnetPaper], 'rec': ['recommend'] } topics, subs = bulk.get_people_subs() bulk.deliver_recs(topics, subs) assert len(core.Person(jojo._id).received) == 2 assert len(core.Person(fred._id).received) == 1
def destroy_db_and_test(): '''tests progressively building an spnet db starting from a blank slate, adding papers, people, posts, topics, etc. and verifying the expected results. NB: this is a destructive test, i.e. it FLUSHES whatever is in the spnet database and fills it with its own test data.''' dbconn = connect.init_connection() dbconn._conn.drop_database('spnet') # start test from a blank slate rootColl = apptree.get_collections() lorem = '''Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.''' jojo = core.Person(docData=dict(name='jojo', age=37)) assert jojo != None assert jojo.force_reload(delay=1) is False # set timer assert jojo.force_reload() is False # timer still waiting time.sleep(2) assert jojo.force_reload() # timer done a1 = core.EmailAddress(docData=dict(address='*****@*****.**', current=True), parent=jojo) fred = core.Person(docData=dict(name='fred', age=56)) a2 = core.EmailAddress(docData=dict(address='*****@*****.**', authenticated=False), parent=fred) a3 = core.EmailAddress(docData=dict(address='*****@*****.**', note='personal account'), parent=fred) paper1 = core.ArxivPaperData('1302.4871', insertNew='findOrInsert').parent paper1.update(dict(authors=[jojo._id])) paper2 = core.ArxivPaperData('1205.6541', insertNew='findOrInsert').parent paper2.update(dict(authors=[fred._id, jojo._id])) assert paper1.arxiv.id == '1302.4871' assert paper2.arxiv.id == '1205.6541' jojoGplus = core.GplusPersonData(docData=dict(id=1234, displayName='Joseph Nye', image={'url':'http://www.nobelprize.org/nobel_prizes/physics/laureates/1921/einstein.jpg'}), parent=jojo) jojoGplus.update(dict(etag='oldversion')) sig1 = core.SIG.find_or_insert('cosmology') sig2 = core.SIG.find_or_insert('lambdaCDMmodel') topicWords = incoming.get_topicIDs(['cosmology', 'astrophysics'], 1, datetime.utcnow(), 'test') assert topicWords == ['cosmology', 'astrophysics'] astroSIG = core.SIG('astrophysics') assert astroSIG.name == '#astrophysics' assert astroSIG.origin == dict(source='test', id=1) int1 = core.PaperInterest(docData=dict(author=jojo._id, topics=[sig1._id]), parent=paper1) assert core.Paper(paper1._id).interests == [int1] assert core.Paper(paper1._id).get_interests() == {sig1._id:[jojo]} assert core.Person(jojo._id).interests == [int1] assert core.Person(jojo._id).topics == [sig1._id] assert core.SIG(sig1._id).interests == [int1] assert core.SIG(sig1._id).get_interests() == {paper1:[jojo]} intAgain = core.PaperInterest((paper1._id, jojo._id)) assert intAgain == int1 try: intAgain.remove_topic(sig2._id) except KeyError: pass else: raise AssertionError('failed to catch bad remove_topic()') assert intAgain.remove_topic(sig1._id) is None assert core.Paper(paper1._id).interests == [] # test creation via POST paperLikes = rootColl['papers'].likes sessioninfo.get_session.sessionDict = dict(person=fred) int2 = paperLikes._POST(fred._id, sig2._id, '1', parents=dict(paper=paper2)) assert int2.parent == paper2 assert int2.author == fred assert int2.topics == [sig2] assert core.Paper(paper2._id).interests == [int2] assert core.Person(fred._id).interests == [int2] assert core.Person(fred._id).topics == [sig2._id] assert core.SIG(sig2._id).interests == [int2] try: paperLikes._POST(fred._id, 'this is not allowed', '1', parents=dict(paper=paper2)) except KeyError: pass else: raise AssertionError('failed to trap bad topic string') # test removal via POST assert paperLikes._POST(fred._id, sig2._id, '0', parents=dict(paper=core.Paper(paper2._id))) == int2 assert core.Paper(paper2._id).interests == [] int3 = paperLikes._POST(fred._id, '#silicene', '1', parents=dict(paper=paper2)) assert core.SIG('silicene').interests == [int3] assert set(core.Person(fred._id).topics) == set([sig2._id, 'silicene']) gplus2 = core.GplusPersonData(docData=dict(id=1234, displayName='Joseph Nye'), insertNew='findOrInsert') assert gplus2 == jojoGplus gplus3 = core.GplusPersonData(docData=dict(id=5678, displayName='Fred Eiserling'), insertNew='findOrInsert') assert gplus3.parent.name == 'Fred Eiserling' rec1 = core.Post(docData=dict(author=fred._id, citationType='recommend', id='1', title='Why You Need to Read This Important Extension of the CDM Model', text=lorem), parent=paper1) rec2 = core.Post(docData=dict(author=jojo._id, text='must read!', citationType='mustread', id='2', sigs=[sig1._id, sig2._id]), parent=paper2._id) assert set(core.Person(jojo._id).topics) == set([sig1._id, sig2._id]) post1 = core.Post(docData=dict(author=fred._id, text='interesting paper!', id=98765, sigs=[sig1._id]), parent=paper1) assert set(core.Person(fred._id).topics) == set([sig1._id, sig2._id, 'silicene']) reply1 = core.Reply(docData=dict(author=jojo._id, text='I disagree with Fred.', id=7890, replyTo=98765), parent=paper1) issue1 = core.Issue(docData=dict(paper=paper1, title='The claims are garbage', category='validity', author=jojo._id, description='there is a major flaw in the first step of your proof')) vote1 = core.IssueVote(docData=dict(person=jojo, rating='crucial', status='open'), parent=issue1) assert core.Person(jojo._id).email == [a1] assert core.Person(jojo._id).replies == [reply1] jgp = core.GplusPersonData(1234) assert jgp.parent == jojo assert jgp.etag == 'oldversion' assert len(rec1.parent.authors) == 1 assert rec1.parent.authors[0] == jojo assert len(rec2.parent.authors) == 2 assert jojo in rec2.parent.authors assert fred in rec2.parent.authors assert len(rec2.parent.recommendations) == 1 assert len(jojo.recommendations) == 1 assert jojo.recommendations[0] == rec2 assert len(jojo.papers) == 2 assert len(fred.papers) == 1 assert len(paper2.authors[0].email) == 2 assert issue1.author == jojo p = core.Paper(paper1._id) assert len(p.issues) == 1 posts1 = p.get_all_posts() assert len(posts1) == 1 assert posts1 == [post1] assert posts1[0].text == 'interesting paper!' assert list(posts1[0].get_replies()) == [reply1] assert core.Post(98765).author == fred assert core.Reply(7890).replyTo == post1 assert core.Reply(7890).parent == paper1 assert filter(lambda x:not x.is_rec(), core.Person(fred._id).posts) == [post1] assert filter(lambda x:not x.is_rec(), core.SIG(sig1._id).posts) == [post1] assert core.Post(98765).sigs == [sig1] replyAgain = core.Reply(docData=dict(author=fred._id, text='interesting paper!', id=7890, replyTo=98765), parent=paper1, insertNew='findOrInsert') assert replyAgain == reply1 assert core.Paper(paper1._id).replies == [reply1] reply2 = core.Reply(docData=dict(author=jojo._id, text='This paper really made me think.', id=7891, replyTo=98765), parent=paper1, insertNew='findOrInsert') assert core.Paper(paper1._id).replies == [reply1, reply2] assert core.Paper(str(paper1._id)) == paper1, 'auto ID conversion failed' assert p.issues[0] == issue1 assert len(p.issues[0].votes) == 1 assert len(rec2.sigs) == 2 assert rec2.sigs[0] == sig1 assert sig1.recommendations == [rec2] rec1.array_append('sigs', sig2) assert len(sig2.recommendations) == 2 assert core.Post(rec1.id).sigs == [sig2] rec2.update(dict(text='totally fascinating!', score=27)) rec3 = core.Post(rec2.id) assert rec3.score == 27 a4 = core.EmailAddress('*****@*****.**') assert a4._parent_link == fred._id assert a4.parent == fred try: p = core.Person('abcdefg') except KeyError: pass else: raise AssertionError('failed to trap bad personID') try: a = core.EmailAddress('*****@*****.**') except KeyError: pass else: raise AssertionError('failed to trap bad email') try: jojo = core.Person(docData=dict(name2='jojo', age=37)) except ValueError: pass else: raise AssertionError('failed to trap Person w/o name') fred.array_append('numbers', 17) assert core.Person(fred._id).numbers == [17] fred.array_append('numbers', 6) assert core.Person(fred._id).numbers == [17, 6] fred.array_del('numbers', 17) assert core.Person(fred._id).numbers == [6] a4.array_append('numbers', 17) assert core.EmailAddress(a4.address).numbers == [17] a4.array_append('numbers', 6) assert core.EmailAddress(a4.address).numbers == [17, 6] a4.array_del('numbers', 17) assert core.EmailAddress(a4.address).numbers == [6] rec3 = core.Post(docData=dict(author=fred._id, citationType='recommend', text='I think this is a major breakthrough.', sigs=[sig2._id], id=3456), parent=paper2._id) assert core.SIG(sig1._id).recommendations == [rec2] assert len(core.SIG(sig2._id).recommendations) == 3 it = gplus.publicAccess.get_person_posts('107295654786633294692') testPosts = list(gplus.publicAccess.find_or_insert_posts(it)) assert len(testPosts) > 0 nposts = len(core.Paper(paper1._id).posts) nreplies = len(core.Paper(paper1._id).replies) it = gplus.publicAccess.get_person_posts('107295654786633294692') testPosts2 = list(gplus.publicAccess.find_or_insert_posts(it)) assert testPosts == testPosts2 assert nposts == len(core.Paper(paper1._id).posts) assert nreplies == len(core.Paper(paper1._id).replies) gpd = core.GplusPersonData('112634568601116338347', insertNew='findOrInsert') assert gpd.displayName == 'Meenakshi Roy' gpd.update_subscriptions(dict(etag='foo', totalItems=1), [dict(id='114744049040264263224')]) gps = gpd.subscriptions assert gps.gplusPerson == gpd mrID = gpd.parent._id subscriptions = core.Person(mrID).subscriptions assert len(subscriptions) == 0 gpd2 = core.GplusPersonData('114744049040264263224', insertNew='findOrInsert') time.sleep(2) subscriptions = core.Person(mrID).subscriptions assert len(subscriptions) == 1 assert subscriptions[0].author == gpd2.parent cjlposts = gpd2.update_posts(999) # retrieve some recs assert len(cjlposts) > 0 # got some assert len(core.Person(mrID).received) > 0 # and they were delivered assert len(core.Person(mrID).get_deliveries()) > 0 # and UI can retrieve them recReply = core.Reply(docData=dict(author=jojo._id, id=78901, replyTo=3456, text='Fred, thanks for your comments! Your insights are really helpful.'), parent=paper2._id) # make sure timestamps present on all recs l = [r.published for r in core.Post.find_obj()] l = [r.published for r in core.Reply.find_obj()] assert recReply.replyTo == rec3 assert list(recReply.replyTo.get_replies()) == [recReply] # pubmed eutils network server constantly failing now?? ## pubmedDict = pubmed.get_pubmed_dict('23482246') ## with open('../pubmed/test1.pickle') as ifile: ## correctDict = pickle.load(ifile) ## assert pubmedDict == correctDict ## paper3 = core.PubmedPaperData('23482246', insertNew='findOrInsert').parent ## paper3.update(dict(authors=[fred._id])) ## ppd = core.PubmedPaperData('23139441', insertNew='findOrInsert') ## assert ppd.doi.upper() == '10.1016/J.MSEC.2012.05.020' ## assert paper3.pubmed.id == '23482246' ## assert paper3.title[:40] == correctDict['title'][:40] s = 'aabbe' t = doi.map_to_doi(s) assert t == '10.1002/(SICI)1097-0258(19980815/30)17:15/16<1661::AID-SIM968>3.0.CO;2-2' assert s == doi.map_to_shortdoi(t) paper4 = core.DoiPaperData(DOI=t, insertNew='findOrInsert').parent paper4.update(dict(authors=[fred._id])) assert paper4.doi.id == s assert paper4.doi.doi == t assert paper4.doi.DOI == t.upper() paper5 = core.DoiPaperData(s, insertNew='findOrInsert').parent assert paper4 == paper5 assert rootColl['shortDOI']._GET(s) == paper4 txt = 'some text ' + paper4.doi.get_hashtag() refs, topics, primary = incoming.get_citations_types_and_topics(txt,spnetworkOnly=False) assert incoming.get_paper(primary,refs[primary][1]) == paper4 spnetPaper = core.DoiPaperData(DOI='10.3389/fncom.2012.00001', insertNew='findOrInsert').parent assert spnetPaper.title.lower() == 'open peer review by a selected-papers network' txt = 'a long comment ' + spnetPaper.doi.get_doctag() + ', some more text' refs, topics, primary = incoming.get_citations_types_and_topics(txt,spnetworkOnly=False) assert incoming.get_paper(primary,refs[primary][1]) == spnetPaper topics, subs = bulk.get_people_subs() bulk.deliver_recs(topics, subs) assert len(core.Person(jojo._id).received) == 4 assert len(core.Person(fred._id).received) == 2
assert paper4.doi.doi == t assert paper4.doi.DOI == t.upper() paper5 = core.DoiPaperData(s, insertNew='findOrInsert').parent assert paper4 == paper5 assert rootColl['shortDOI']._GET(s) == paper4 txt = 'some text ' + paper4.doi.get_hashtag() assert incoming.get_hashtag_dict(txt)['paper'] == [paper4] spnetPaper = core.DoiPaperData(DOI='10.3389/fncom.2012.00001', insertNew='findOrInsert').parent assert spnetPaper.title.lower() == 'open peer review by a selected-papers network' txt = 'a long comment ' + spnetPaper.doi.get_doctag() + ', some more text' assert incoming.get_hashtag_dict(txt)['paper'] == [spnetPaper] ## t = 'this is text #spnetwork #recommend #arxiv_1302_4871 #pubmed_22291635 #cosmology' ## d = incoming.get_hashtag_dict(t) ## assert d == {'header': ['spnetwork'], 'topic': ['cosmology'], 'paper': [paper1, spnetPaper], 'rec': ['recommend']} ## t = 'this is text #spnetwork #recommend arXiv:1302.4871 PMID: 22291635 #cosmology' ## d = incoming.get_hashtag_dict(t) ## assert d == {'header': ['spnetwork'], 'topic': ['cosmology'], 'paper': [paper1, spnetPaper], 'rec': ['recommend']} t = 'this is text #spnetwork #recommend doi: 10.3389/fncom.2012.00001 i like doi: this #cosmology' d = incoming.get_hashtag_dict(t) assert d == {'header': ['spnetwork'], 'topic': ['cosmology'], 'paper': [spnetPaper], 'rec': ['recommend']} topics, subs = bulk.get_people_subs() bulk.deliver_recs(topics, subs) assert len(core.Person(jojo._id).received) == 2 assert len(core.Person(fred._id).received) == 1
def find_or_insert_posts(posts, get_post_comments, find_or_insert_person, get_content, get_user, get_replycount, get_id, get_timestamp, is_reshare, source, process_post=None, process_reply=None, recentEvents=None, maxDays=None): 'generate each post that has a paper hashtag, adding to DB if needed' now = datetime.utcnow() saveEvents = [] for d in posts: post = None timeStamp = get_timestamp(d) if maxDays is not None and (now - timeStamp).days > maxDays: break if is_reshare(d): # just a duplicate (reshared) post, so skip continue content = get_content(d) isRec = content.find('#recommend') >= 0 or \ content.find('#mustread') >= 0 if not isRec: try: post = core.Post(d['id']) if getattr(post, 'etag', None) == d.get('etag', ''): yield post continue # matches DB record, so nothing to do except KeyError: pass hashtagDict = get_hashtag_dict(content) # extract tags and IDs if post is None: # extract data for saving post to DB try: paper = hashtagDict['paper'][0] # link to first paper except KeyError: continue # no link to a paper, so nothing to save. userID = get_user(d) author = find_or_insert_person(userID) d['author'] = author._id if isRec: # see if rec already in DB try: post = core.Recommendation((paper._id, author._id)) if getattr(post, 'etag', None) == d.get('etag', ''): yield post continue # matches DB record, so nothing to do except KeyError: # need to save new record to DB klass = core.Recommendation else: klass = core.Post d['text'] = content if process_post: process_post(d) d['sigs'] = get_topicIDs(hashtagDict, get_id(d), timeStamp, source) if post is None: # save to DB post = klass(docData=d, parent=paper) if isRec: try: topicsDict except NameError: topicsDict, subsDict = bulk.get_people_subs() bulk.deliver_rec(paper._id, d, topicsDict, subsDict) if recentEvents is not None: # add to monitor deque saveEvents.append(post) else: # update DB with new data and etag post.update(d) yield post if get_replycount(d) > 0: for c in get_post_comments(d['id']): if process_reply: process_reply(c) try: r = core.Reply(c['id']) if getattr(r, 'etag', None) != c.get('etag', ''): # update DB record with latest data r.update(dict(etag=c.get('etag', ''), text=get_content(c), updated=c.get('updated', ''))) continue # already stored in DB, no need to save except KeyError: pass userID = get_user(c) author = find_or_insert_person(userID) c['author'] = author._id c['text'] = get_content(c) c['replyTo'] = d['id'] r = core.Reply(docData=c, parent=post._parent_link) if recentEvents is not None: # add to monitor deque saveEvents.append(r) if saveEvents and recentEvents is not None: saveEvents.sort(lambda x,y:cmp(x.published, y.published)) for r in saveEvents: recentEvents.appendleft(r) # add to monitor deque
def find_or_insert_posts(posts, get_post_comments, find_or_insert_person, get_content, get_user, get_replycount, get_id, get_timestamp, is_reshare, source, process_post=None, process_reply=None, recentEvents=None, maxDays=None): 'generate each post that has a paper hashtag, adding to DB if needed' now = datetime.utcnow() saveEvents = [] for d in posts: post = None timeStamp = get_timestamp(d) if maxDays is not None and (now - timeStamp).days > maxDays: break if is_reshare(d): # just a duplicate (reshared) post, so skip continue content = get_content(d) isRec = content.find('#recommend') >= 0 or \ content.find('#mustread') >= 0 if not isRec: try: post = core.Post(d['id']) if getattr(post, 'etag', None) == d.get('etag', ''): yield post continue # matches DB record, so nothing to do except KeyError: pass hashtagDict = get_hashtag_dict(content) # extract tags and IDs if post is None: # extract data for saving post to DB try: paper = hashtagDict['paper'][0] # link to first paper except KeyError: continue # no link to a paper, so nothing to save. userID = get_user(d) author = find_or_insert_person(userID) d['author'] = author._id if isRec: # see if rec already in DB try: post = core.Recommendation((paper._id, author._id)) if getattr(post, 'etag', None) == d.get('etag', ''): yield post continue # matches DB record, so nothing to do except KeyError: # need to save new record to DB klass = core.Recommendation else: klass = core.Post d['text'] = content if process_post: process_post(d) d['sigs'] = get_topicIDs(hashtagDict, get_id(d), timeStamp, source) if post is None: # save to DB post = klass(docData=d, parent=paper) if isRec: try: topicsDict except NameError: topicsDict, subsDict = bulk.get_people_subs() bulk.deliver_rec(paper._id, d, topicsDict, subsDict) if recentEvents is not None: # add to monitor deque saveEvents.append(post) else: # update DB with new data and etag post.update(d) yield post if get_replycount(d) > 0: for c in get_post_comments(d['id']): if process_reply: process_reply(c) try: r = core.Reply(c['id']) if getattr(r, 'etag', None) != c.get('etag', ''): # update DB record with latest data r.update( dict(etag=c.get('etag', ''), text=get_content(c), updated=c.get('updated', ''))) continue # already stored in DB, no need to save except KeyError: pass userID = get_user(c) author = find_or_insert_person(userID) c['author'] = author._id c['text'] = get_content(c) c['replyTo'] = d['id'] r = core.Reply(docData=c, parent=post._parent_link) if recentEvents is not None: # add to monitor deque saveEvents.append(r) if saveEvents and recentEvents is not None: saveEvents.sort(lambda x, y: cmp(x.published, y.published)) for r in saveEvents: recentEvents.appendleft(r) # add to monitor deque
def find_or_insert_posts(posts, get_post_comments, find_or_insert_person, get_content, get_user, get_replycount, get_id, get_timestamp, is_reshare, source, process_post=None, process_reply=None, recentEvents=None, maxDays=None, citationType='discuss', citationType2='discuss', get_title=lambda x:x['title'], spnetworkOnly=True): 'generate each post that has a paper hashtag, adding to DB if needed' now = datetime.utcnow() saveEvents = [] for d in posts: post = None timeStamp = get_timestamp(d) if maxDays is not None and (now - timeStamp).days > maxDays: break if is_reshare(d): # just a duplicate (reshared) post, so skip continue content = get_content(d) if spnetworkOnly and content.find('#spnetwork') < 0: continue # ignore posts lacking our spnetwork hashtag isRec = content.find('#recommend') >= 0 or \ content.find('#mustread') >= 0 try: post = core.Post(get_id(d)) if getattr(post, 'etag', None) == d.get('etag', ''): yield post continue # matches DB record, so nothing to do except KeyError: pass hashtagDict = get_hashtag_dict(content) # extract tags and IDs if post is None: # extract data for saving post to DB try: papers = hashtagDict['paper'] paper = papers[0] # link to first paper except KeyError: continue # no link to a paper, so nothing to save. userID = get_user(d) author = find_or_insert_person(userID) d['author'] = author._id d['text'] = content if process_post: process_post(d) d['sigs'] = get_topicIDs(hashtagDict, get_id(d), timeStamp, source) if isRec: # record rec type try: d['citationType'] = hashtagDict['rec'][0] except KeyError: # handle bad rec hashtag d['citationType'] = 'recommend' else: # use default citation type d['citationType'] = citationType if post is None: # save to DB post = core.Post(docData=d, parent=paper) if len(papers) > 1: # save 2ary citations post.add_citations(papers[1:], citationType2) try: topicsDict except NameError: topicsDict, subsDict = bulk.get_people_subs() bulk.deliver_rec(paper._id, d, topicsDict, subsDict) if recentEvents is not None: # add to monitor deque saveEvents.append(post) else: # update DB with new data and etag post.update(d) yield post if get_replycount(d) > 0: for c in get_post_comments(get_id(d)): if process_reply: process_reply(c) try: r = core.Reply(get_id(c)) if getattr(r, 'etag', None) != c.get('etag', ''): # update DB record with latest data r.update(dict(etag=c.get('etag', ''), text=get_content(c), updated=c.get('updated', ''))) continue # already stored in DB, no need to save except KeyError: pass userID = get_user(c) author = find_or_insert_person(userID) c['author'] = author._id c['text'] = get_content(c) c['replyTo'] = get_id(d) if isRec: # record the type of post c['sourcetype'] = 'rec' else: c['sourcetype'] = 'post' r = core.Reply(docData=c, parent=post._parent_link) if recentEvents is not None: # add to monitor deque saveEvents.append(r) if saveEvents and recentEvents is not None: saveEvents.sort(lambda x,y:cmp(x.published, y.published)) for r in saveEvents: recentEvents.appendleft(r) # add to monitor deque
def find_or_insert_posts(posts, get_post_comments, find_or_insert_person, get_content, get_user, get_replycount, get_id, get_timestamp, is_reshare, source, process_post=None, process_reply=None, recentEvents=None, maxDays=None, citationType='discuss', citationType2='discuss', get_title=lambda x:x['title'], spnetworkOnly=True): 'generate each post that has a paper hashtag, adding to DB if needed' now = datetime.utcnow() saveEvents = [] for d in posts: post = None timeStamp = get_timestamp(d) if maxDays is not None and (now - timeStamp).days > maxDays: break if is_reshare(d): # just a duplicate (reshared) post, so skip continue content = get_content(d) try: post = core.Post(get_id(d)) if getattr(post, 'etag', None) == d.get('etag', ''): yield post continue # matches DB record, so nothing to do except KeyError: pass if spnetworkOnly and content.find('#spnetwork') < 0: if post: post.delete() # remove old Post: no longer tagged! continue # ignore posts lacking our spnetwork hashtag # extract tags and IDs: citations, topics, primary = get_citations_types_and_topics(content) try: primary_paper_ID = citations[primary] paper = get_paper(primary,primary_paper_ID[1]) except KeyError: continue # no link to a paper, so nothing to save. if post and post.parent != paper: # changed primary binding! post.delete() # delete old binding post = None # must resave to new binding d['text'] = content if process_post: process_post(d) d['sigs'] = get_topicIDs(topics, get_id(d),timeStamp, source) d['citationType'] = citations[primary][0] oldCitations = {} if post is None: # save to DB userID = get_user(d) author = find_or_insert_person(userID) d['author'] = author._id post = core.Post(docData=d, parent=paper) try: topicsDict except NameError: topicsDict, subsDict = bulk.get_people_subs() bulk.deliver_rec(paper._id, d, topicsDict, subsDict) if recentEvents is not None: # add to monitor deque saveEvents.append(post) else: # update DB with new data and etag post.update(d) for c in getattr(post, 'citations', ()): # index old citations oldCitations[c.parent] = c for ref, meta in citations.iteritems(): # add / update new citations if ref != primary: paper2 = get_paper(ref, meta[1]) try: # if already present, just update citationType if changed c = oldCitations[paper2] if c.citationType != meta[0]: c.update(dict(citationType=meta[0])) del oldCitations[paper2] # don't treat as old citation except KeyError: post.add_citations([paper2], meta[0]) for c in oldCitations.values(): c.delete() # delete citations no longer present in updated post yield post if get_replycount(d) > 0: for c in get_post_comments(get_id(d)): if process_reply: process_reply(c) try: r = core.Reply(get_id(c)) if getattr(r, 'etag', None) != c.get('etag', ''): # update DB record with latest data r.update(dict(etag=c.get('etag', ''), text=get_content(c), updated=c.get('updated', ''))) continue # already stored in DB, no need to save except KeyError: pass userID = get_user(c) author = find_or_insert_person(userID) c['author'] = author._id c['text'] = get_content(c) c['replyTo'] = get_id(d) r = core.Reply(docData=c, parent=post._parent_link) if recentEvents is not None: # add to monitor deque saveEvents.append(r) if saveEvents and recentEvents is not None: saveEvents.sort(lambda x,y:cmp(x.published, y.published)) for r in saveEvents: recentEvents.appendleft(r) # add to monitor deque