def test_task_process_myads(self): msg = {'frequency': 'daily'} # can't process without a user ID with patch.object(tasks.logger, 'error', return_value=None) as logger: tasks.task_process_myads(msg) logger.assert_called_with(u"No user ID received for {0}".format(msg)) msg = {'userid': 123} # can't process without a frequency with patch.object(tasks.logger, 'error', return_value=None) as logger: tasks.task_process_myads(msg) logger.assert_called_with(u"No frequency received for {0}".format(msg)) # process a user (the user should get created during the task) msg = {'userid': 123, 'frequency': 'daily'} httpretty.register_uri( httpretty.GET, self.app.conf['API_VAULT_MYADS_SETUP'] % msg['userid'], content_type='application/json', status=200, body=json.dumps([{'id': 1, 'name': 'Query 1', 'qid': '1234567890abcdefghijklmnopqrstu1', 'active': True, 'stateful': True, 'frequency': 'daily', 'type': 'query'}, {'id': 2, 'name': 'Query 2', 'qid': '1234567890abcdefghijklmnopqrstu2', 'active': True, 'stateful': False, 'frequency': 'weekly', 'type': 'query'}, {'id': 3, 'name': 'Query 3', 'qid': '1234567890abcdefghijklmnopqrstu3', 'active': True, 'stateful': False, 'frequency': 'weekly', 'type': 'template', 'template': 'authors', 'data': {'data': 'author:Kurtz'}, 'query': [{'q': 'author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', 'sort': 'score desc, bibcode desc'}]}, {'id': 4, 'name': 'Query 4', 'qid': None, 'active': True, 'stateful': True, 'frequency': 'daily', 'type': 'template', 'template': 'arxiv', 'data': 'star', 'classes': ['astro-ph'], 'query': [{'q': 'bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) ' 'entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', 'sort': 'score desc, bibcode desc'}, {'q': 'bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) ' 'entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', 'sort': 'bibcode desc'}] } ]) ) httpretty.register_uri( httpretty.GET, self.app.conf['API_VAULT_EXECUTE_QUERY'] % ('1234567890abcdefghijklmnopqrstu1', 'bibcode,title,author_norm', 10, 'bibcode+desc'), content_type='application/json', status=200, body=json.dumps({'response': {'docs': [{'bibcode': '2019arXiv190800829P', 'title': ['Gravitational wave signatures from an extended ' + 'inert doublet dark matter model'], 'author_norm': ['Paul, A', 'Banerjee, B', 'Majumdar, D'], "identifier": ["2019arXiv190800829P", "arXiv:1908.00829"], "year": "2019", "bibstem": ["arXiv"]}, {'bibcode': '2019arXiv190800678L', 'title': ['Prospects for Gravitational Wave Measurement ' + 'of ZTFJ1539+5027'], 'author_norm': ['Littenberg, T', 'Cornish, N'], "identifier": ["2019arXiv190800678L", "arXiv:1908.00678"], "year": "2019", "bibstem": ["arXiv"]}], 'numFound': 2, 'start': 0}, 'responseHeader': {'QTime': 5, 'params': {'fl': 'bibcode,title,author_norm,identifier,year,bibstem', 'q': 'title:"gravity waves" ' + 'entdate:[2019-08-03 TO 2019-08-04] bibstem:"arxiv"', 'rows': '2', 'start': '0', 'wt': 'json', 'x-amzn-trace-id': 'Root=1-5d3b6518-3b417bec5eee25783a4147f4'}, 'status': 0}}) ) httpretty.register_uri(httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={query}&sort={sort}&fl={fields}&rows={rows}'.format(query=quote_plus('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]'), sort=quote_plus('score desc, bibcode desc'), fields='bibcode,title,author_norm,identifier,year,bibstem', rows=5), content_type='application/json', status=401 ) with patch.object(self.app, 'get_recent_results') as get_recent_results, \ patch.object(utils, 'get_user_email') as get_user_email, \ patch.object(utils, 'payload_to_plain') as payload_to_plain, \ patch.object(utils, 'payload_to_html') as payload_to_html, \ patch.object(utils, 'send_email') as send_email, \ patch.object(tasks.task_process_myads, 'apply_async') as rerun_task: get_recent_results.return_value = ['2019arXiv190800829P', '2019arXiv190800678L'] get_user_email.return_value = '*****@*****.**' payload_to_plain.return_value = 'plain payload' payload_to_html.return_value = '<em>html payload</em>' send_email.return_value = 'this should be a MIMEMultipart object' tasks.task_process_myads(msg) self.assertTrue(rerun_task.called) httpretty.register_uri( httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={0}&sort={1}&fl={2}&rows={3}'. format('bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', 'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5), content_type='application/json', status=200, body=json.dumps({"responseHeader": {"status": 0, "QTime": 23, "params": { "q": 'bibstem:arxiv (arxiv_class:(astro-ph.*) (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', "x-amzn-trace-id": "Root=1-5d769c6c-f96bfa49d348f03d8ecb7464", "fl": "bibcode,title,author_norm", "start": "0", "sort": "score desc, bibcode desc", "rows": "5", "wt": "json"}}, "response": {"numFound": 2712, "start": 0, "docs": [{"bibcode": "1971JVST....8..324K", "title": ["High-Capacity Lead Tin Barrel Dome..."], "author_norm": ["Kurtz, J"], "identifier": ["1971JVST....8..324K"], "year": "1971", "bibstem": ["JVST"]}, {"bibcode": "1972ApJ...178..701K", "title": [ "Search for Coronal Line Emission from the Cygnus Loop"], "author_norm": ["Kurtz, D", "Vanden Bout, P", "Angel, J"], "identifier": ["1972ApJ...178..701K"], "year": "1972", "bibstem": ["ApJ"]}, {"bibcode": "1973ApOpt..12..891K", "title": ["Author's Reply to Comments on: Experimental..."], "author_norm": ["Kurtz, R"], "identifier": ["1973ApOpt..12..891K"], "year": "1973", "bibstem": ["ApOpt"]}, {"bibcode": "1973SSASJ..37..725W", "title": ["Priming Effect of 15N-Labeled Fertilizers..."], "author_norm": ["Westerman, R", "Kurtz, L"], "identifier": ["1973SSASJ..37..725W"], "year": "1973", "bibstem": ["SSASJ"]}, {"bibcode": "1965JSpRo...2..818K", "title": [ "Orbital tracking and decay analysis of the saturn..."], "author_norm": ["Kurtz, H", "McNair, A", "Naumcheff, M"], "identifier": ["1965JSpRo...2..818K"], "year": "1965", "bibstem": ["JSpRo"]}]}}) ) httpretty.register_uri( httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT'] + '?q={0}&sort={1}&fl={2}&rows={3}'. format('bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', 'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5), content_type='application/json', status=200, body=json.dumps({"responseHeader": {"status": 0, "QTime": 23, "params": { "q": 'bibstem:arxiv (arxiv_class:(astro-ph.*) NOT (star)) entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', "x-amzn-trace-id": "Root=1-5d769c6c-f96bfa49d348f03d8ecb7464", "fl": "bibcode,title,author_norm", "start": "0", "sort": "score desc, bibcode desc", "rows": "5", "wt": "json"}}, "response": {"numFound": 2712, "start": 0, "docs": [{"bibcode": "1971JVST....8..324K", "title": ["High-Capacity Lead Tin Barrel Dome..."], "author_norm": ["Kurtz, J"], "identifier": ["1971JVST....8..324K"], "year": "1971", "bibstem": ["JVST"]}, {"bibcode": "1972ApJ...178..701K", "title": [ "Search for Coronal Line Emission from the Cygnus Loop"], "author_norm": ["Kurtz, D", "Vanden Bout, P", "Angel, J"], "identifier": ["1972ApJ...178..701K"], "year": "1972", "bibstem": ["ApJ"]}, {"bibcode": "1973ApOpt..12..891K", "title": ["Author's Reply to Comments on: Experimental..."], "author_norm": ["Kurtz, R"], "identifier": ["1973ApOpt..12..891K"], "year": "1973", "bibstem": ["ApOpt"]}, {"bibcode": "1973SSASJ..37..725W", "title": ["Priming Effect of 15N-Labeled Fertilizers..."], "author_norm": ["Westerman, R", "Kurtz, L"], "identifier": ["1973SSASJ..37..725W"], "year": "1973", "bibstem": ["SSASJ"]}, {"bibcode": "1965JSpRo...2..818K", "title": [ "Orbital tracking and decay analysis of the saturn..."], "author_norm": ["Kurtz, H", "McNair, A", "Naumcheff, M"], "identifier": ["1965JSpRo...2..818K"], "year": "1965", "bibstem": ["JSpRo"]}]}}) ) tasks.task_process_myads(msg) with self.app.session_scope() as session: user = session.query(AuthorInfo).filter_by(id=123).first() self.assertEqual(adsputils.get_date().date(), user.last_sent.date()) msg = {'userid': 123, 'frequency': 'daily', 'force': False} httpretty.register_uri( httpretty.GET, self.app.conf['API_VAULT_EXECUTE_QUERY'] % ('1234567890abcdefghijklmnopqrstu2', 'bibcode,title,author_norm', 10, 'bibcode+desc'), content_type='application/json', status=200, body=json.dumps({u'response': {u'docs': [{u'bibcode': u'2019arXiv190800829P', u'title': [u'Gravitational wave signatures from an ' + u'extended inert doublet dark matter model'], u'author_norm': [u'Paul, A', u'Banerjee, B', u'Majumdar, D'], u"identifier": [u"2019arXiv190800829P", u"arXiv:1908.00829"], u"year": u"2019", u"bibstem": [u"arXiv"]}, {u'bibcode': u'2019arXiv190800678L', u'title': [u'Prospects for Gravitational Wave Measurement ' + u'of ZTFJ1539+5027'], u'author_norm': [u'Littenberg, T', u'Cornish, N'], u"identifier": [u"2019arXiv190800678L", u"arXiv:1908.00678"], u"year": u"2019", u"bibstem": [u"arXiv"]}], u'numFound': 2, u'start': 0}, u'responseHeader': {u'QTime': 5, u'params': {u'fl': u'bibcode,title,author_norm', u'fq': u'{!bitset}', u'q': u'*:*', u'rows': u'2', u'start': u'0', u'wt': u'json', u'x-amzn-trace-id': u'Root=1-5d3b6518-3b417bec5eee25783a4147f4'}, u'status': 0}}) ) httpretty.register_uri( httpretty.GET, self.app.conf['API_SOLR_QUERY_ENDPOINT']+'?q={0}&sort={1}&fl={2}&rows={3}'. format('author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', 'score+desc,+bibcode+desc', 'bibcode,title,author_norm', 5), content_type='application/json', status=200, body=json.dumps({"responseHeader": {"status": 0, "QTime": 23, "params": {"q": 'author:Kurtz entdate:["2020-01-01Z00:00" TO "2020-01-01Z23:59"] pubdate:[2019-00 TO *]', "x-amzn-trace-id": "Root=1-5d769c6c-f96bfa49d348f03d8ecb7464", "fl": "bibcode,title,author_norm", "start": "0", "sort": "score desc, bibcode desc", "rows": "5", "wt": "json"}}, "response": {"numFound": 2712, "start": 0, "docs": [{"bibcode": "1971JVST....8..324K", "title": ["High-Capacity Lead Tin Barrel Dome..."], "author_norm": ["Kurtz, J"], "identifier": ["1971JVST....8..324K"], "year": "1971", "bibstem": ["JVST"]}, {"bibcode": "1972ApJ...178..701K", "title": ["Search for Coronal Line Emission from the Cygnus Loop"], "author_norm": ["Kurtz, D", "Vanden Bout, P", "Angel, J"], "identifier": ["1972ApJ...178..701K"], "year": "1972", "bibstem": ["ApJ"]}, {"bibcode": "1973ApOpt..12..891K", "title": ["Author's Reply to Comments on: Experimental..."], "author_norm":["Kurtz, R"], "identifier": ["1973ApOpt..12..891K"], "year": "1973", "bibstem": ["ApOpt"]}, {"bibcode": "1973SSASJ..37..725W", "title": ["Priming Effect of 15N-Labeled Fertilizers..."], "author_norm": ["Westerman, R","Kurtz, L"], "identifier": ["1973SSASJ..37..725W"], "year": "1973", "bibstem": ["SSASJ"]}, {"bibcode": "1965JSpRo...2..818K", "title": ["Orbital tracking and decay analysis of the saturn..."], "author_norm":["Kurtz, H", "McNair, A", "Naumcheff, M"], "identifier": ["1965JSpRo...2..818K"], "year": "1965", "bibstem": ["JSpRo"]}]}}) ) with patch.object(self.app, 'get_recent_results') as get_recent_results, \ patch.object(utils, 'get_user_email') as get_user_email, \ patch.object(utils, 'payload_to_plain') as payload_to_plain, \ patch.object(utils, 'payload_to_html') as payload_to_html, \ patch.object(utils, 'send_email') as send_email: get_recent_results.return_value = ['2019arXiv190800829P', '2019arXiv190800678L'] get_user_email.return_value = '*****@*****.**' payload_to_plain.return_value = 'plain payload' payload_to_html.return_value = '<em>html payload</em>' send_email.return_value = 'this should be a MIMEMultipart object' # already ran today, tried to run again without force=True with patch.object(tasks.logger, 'warning', return_value=None) as logger: tasks.task_process_myads(msg) logger.assert_called_with(u"Email for user {0} already sent today".format(msg['userid'])) msg = {'userid': 123, 'frequency': 'weekly'} # reset user with self.app.session_scope() as session: user = session.query(AuthorInfo).filter_by(id=123).first() user.last_sent = None session.add(user) session.commit() with self.app.session_scope() as session: user = session.query(AuthorInfo).filter_by(id=123).first() self.assertIsNone(user.last_sent) tasks.task_process_myads(msg) with self.app.session_scope() as session: user = session.query(AuthorInfo).filter_by(id=123).first() self.assertEqual(adsputils.get_date().date(), user.last_sent.date())
def process_myads(since=None, user_ids=None, user_emails=None, test_send_to=None, admin_email=None, force=False, frequency='daily', test_bibcode=None, **kwargs): """ Processes myADS mailings :param since: check for new myADS users since this date :param user_ids: users to process claims for, else all users - list (given as adsws IDs) :param user_emails: users to process claims for, else all users - list (given as email addresses) :param test_send_to: for testing; process a given user ID but send the output to this email address :param admin_email: if provided, email is sent to this address at beginning and end of processing (does not trigger for processing for individual users) :param force: if True, will force processing of emails even if sent for a given user already that day :param frequency: basestring; 'daily' or 'weekly' :param test_bibcode: bibcode to query to test if Solr searcher has been updated :return: no return """ if user_ids: for u in user_ids: tasks.task_process_myads({'userid': u, 'frequency': frequency, 'force': True, 'test_send_to': test_send_to, 'test_bibcode': test_bibcode}) logger.info('Done (just the supplied user IDs)') return if user_emails: for u in user_emails: r = app.client.get(config.get('API_ADSWS_USER_EMAIL') % u, headers={'Accept': 'application/json', 'Authorization': 'Bearer {0}'.format(config.get('API_TOKEN'))} ) if r.status_code == 200: user_id = r.json()['id'] else: logger.warning('Error getting user ID with email {0} from the API. Processing aborted for this user'.format(u)) continue tasks.task_process_myads({'userid': user_id, 'frequency': frequency, 'force': True, 'test_send_to': test_send_to, 'test_bibcode': test_bibcode}) logger.info('Done (just the supplied user IDs)') return logging.captureWarnings(True) if admin_email: msg = utils.send_email(email_addr=admin_email, payload_plain='Processing started for {}'.format(get_date()), payload_html='Processing started for {}'.format(get_date()), subject='myADS {0} processing has started'.format(frequency)) # if since keyword not provided, since is set to timestamp of last processing if not since or isinstance(since, basestring) and since.strip() == "": with app.session_scope() as session: if frequency == 'daily': kv = session.query(KeyValue).filter_by(key='last.process.daily').first() else: kv = session.query(KeyValue).filter_by(key='last.process.weekly').first() if kv is not None: since = kv.value else: since = '1971-01-01T12:00:00Z' users_since_date = get_date(since) logger.info('Processing {0} myADS queries since: {1}'.format(frequency, users_since_date.isoformat())) last_process_date = get_date() all_users = app.get_users(users_since_date.isoformat()) for user in all_users: try: tasks.task_process_myads.delay({'userid': user, 'frequency': frequency, 'force': force, 'test_bibcode': test_bibcode}) except: # potential backpressure (we are too fast) time.sleep(2) print 'Conn problem, retrying...', user tasks.task_process_myads.delay({'userid': user, 'frequency': frequency, 'force': force, 'test_bibcode': test_bibcode}) # update last processed timestamp with app.session_scope() as session: if frequency == 'daily': kv = session.query(KeyValue).filter_by(key='last.process.daily').first() else: kv = session.query(KeyValue).filter_by(key='last.process.weekly').first() if kv is None: if frequency == 'daily': kv = KeyValue(key='last.process.daily', value=last_process_date.isoformat()) else: kv = KeyValue(key='last.process.weekly', value=last_process_date.isoformat()) session.add(kv) else: kv.value = last_process_date.isoformat() session.commit() print 'Done submitting {0} myADS processing tasks for {1} users.'.format(frequency, len(all_users)) logger.info('Done submitting {0} myADS processing tasks for {1} users.'.format(frequency, len(all_users)))