def _send_rfps_to_subscribers(self, sub, first_name, email, results): """ Helper function to assist in testing. """ try: # Query RFPs based on this subscription's keyword rfp_list = RFP.search(phrase=sub.keyword, date=sub.last_updated, limit=10) if rfp_list and len(rfp_list) > 0: template_values = { "rfps": rfp_list, "name": first_name, 'search_text': sub.keyword, 'is_admin': False, 'search_uri': 'http://rfpow301.appspot.com/rfp/search/', 'permalink_uri': 'http://rfpow301.appspot.com/rfp/' } subject = "New RFPs for \"%s\" : RFPow!" % sub.keyword self.send(subject, email, template_values) # Update the last update time so we know to not send dups on next cron sub.last_updated = datetime.datetime.now().date() sub.put() msg = "Found %d RFPs for %s with keyword '%s' for email: %s" %\ (len(rfp_list), sub.username, sub.keyword, email) logging.info(msg) results.append(msg) else: msg = 'No RFPs found for username: %s and keyword: %s' % ( sub.username, sub.keyword) logging.info(msg) results.append(msg) except: msg = 'Problem with sending a sub, probably None object' logging.info(msg) results.append(msg)
def _send_rfps_to_subscribers(self, sub, first_name, email, results): """ Helper function to assist in testing. """ try: # Query RFPs based on this subscription's keyword rfp_list = RFP.search(phrase=sub.keyword, date=sub.last_updated, limit=10) if rfp_list and len(rfp_list) > 0: template_values = { "rfps": rfp_list, "name": first_name, 'search_text': sub.keyword, 'is_admin': False, 'search_uri': 'http://rfpow301.appspot.com/rfp/search/', 'permalink_uri': 'http://rfpow301.appspot.com/rfp/' } subject = "New RFPs for \"%s\" : RFPow!" % sub.keyword self.send(subject, email, template_values) # Update the last update time so we know to not send dups on next cron sub.last_updated = datetime.datetime.now().date() sub.put() msg = "Found %d RFPs for %s with keyword '%s' for email: %s" %\ (len(rfp_list), sub.username, sub.keyword, email) logging.info(msg) results.append(msg) else: msg = 'No RFPs found for username: %s and keyword: %s' % (sub.username, sub.keyword) logging.info(msg) results.append(msg) except: msg = 'Problem with sending a sub, probably None object' logging.info(msg) results.append(msg)
def parse(parser, ignore_duplicates=False, start_id=None, stop_on_dupe=False, limit=None): """Parse a bunch of RFPs from Merx and stash results in the DB parser -- instance of a Parser class with which to parse ignore_duplicates -- save RFP even if already in the DB start_id -- begin parsing at original_id == start_id stop_on_dupe -- halt parse job if we hit a duplicate limit -- parse at most `limit` jobs """ parsed_total = 0 parsed_new = 0 page = 0 # skip RFPs until found given start_id. Handy for resuming a parse job skip = start_id is not None while parser.has_next(): page += 1 rfps = parser.next(parse_each=False) parsed_total = parsed_total + len(rfps) for r in rfps: title = r['title'].encode('utf-8') # skip if given an ID to resume parsing from if skip: if start_id != r['original_id']: logging.info( 'Skipping while waiting for %d: %s' % (start_id, title) ) continue else: skip = False logging.info( 'Resuming parsing from ID: %s' % r['original_id'] ) # check if we've parsed this RFP before if not ignore_duplicates: db_match = RFP.by_original_id( r['origin'], r['original_id'] ) # either skip if this RFP is already parsed, or stop parsing if db_match.count() != 0: if stop_on_dupe: logging.info( 'Stopping early on RFP: %s' % title ) return (parsed_total, parsed_new) else: logging.info( 'Skipping existing RFP: %s.' % title ) continue else: # stop early if there's a limit on number of RFPs parsed if limit is not None and limit <= parsed_new: logging.info( 'Stopping early due to limit: %s' % title ) return (parsed_total, parsed_new) rfp = RFP.from_dict( parser.parse_details(r) ) rfp.put() rfp.index() parsed_new += 1 logging.info( u'Saving new RFP: %s' % rfp ) logging.info( 'Parsed page %d of Merx results' % page ) return (parsed_total, parsed_new)