Beispiel #1
0
    def _send_rfps_to_subscribers(self, sub, first_name, email, results):
        """
            Helper function to assist in testing.
        """

        try:
            # Query RFPs based on this subscription's keyword
            rfp_list = RFP.search(phrase=sub.keyword,
                                  date=sub.last_updated,
                                  limit=10)

            if rfp_list and len(rfp_list) > 0:
                template_values = {
                    "rfps": rfp_list,
                    "name": first_name,
                    'search_text': sub.keyword,
                    'is_admin': False,
                    'search_uri': 'http://rfpow301.appspot.com/rfp/search/',
                    'permalink_uri': 'http://rfpow301.appspot.com/rfp/'
                }

                subject = "New RFPs for \"%s\" : RFPow!" % sub.keyword
                self.send(subject, email, template_values)

                # Update the last update time so we know to not send dups on next cron
                sub.last_updated = datetime.datetime.now().date()
                sub.put()

                msg = "Found %d RFPs for %s with keyword '%s' for email: %s" %\
                      (len(rfp_list), sub.username, sub.keyword, email)

                logging.info(msg)
                results.append(msg)
            else:
                msg = 'No RFPs found for username: %s and keyword: %s' % (
                    sub.username, sub.keyword)

                logging.info(msg)
                results.append(msg)
        except:
            msg = 'Problem with sending a sub, probably None object'
            logging.info(msg)
            results.append(msg)
Beispiel #2
0
    def _send_rfps_to_subscribers(self, sub, first_name, email, results):
        """
            Helper function to assist in testing.
        """

        try:
            # Query RFPs based on this subscription's keyword
            rfp_list = RFP.search(phrase=sub.keyword, date=sub.last_updated, limit=10)

            if rfp_list and len(rfp_list) > 0:
                template_values = {
                    "rfps": rfp_list,
                    "name": first_name,
                    'search_text': sub.keyword,
                    'is_admin': False,
                    'search_uri': 'http://rfpow301.appspot.com/rfp/search/',
                    'permalink_uri': 'http://rfpow301.appspot.com/rfp/'
                }

                subject = "New RFPs for \"%s\" : RFPow!" % sub.keyword
                self.send(subject, email, template_values)

                # Update the last update time so we know to not send dups on next cron
                sub.last_updated = datetime.datetime.now().date()
                sub.put()

                msg = "Found %d RFPs for %s with keyword '%s' for email: %s" %\
                      (len(rfp_list), sub.username, sub.keyword, email)

                logging.info(msg)
                results.append(msg)
            else:
                msg = 'No RFPs found for username: %s and keyword: %s' % (sub.username, sub.keyword)

                logging.info(msg)
                results.append(msg)
        except:
            msg = 'Problem with sending a sub, probably None object'
            logging.info(msg)
            results.append(msg)
Beispiel #3
0
    def parse(parser, ignore_duplicates=False, start_id=None, 
            stop_on_dupe=False, limit=None):
        """Parse a bunch of RFPs from Merx and stash results in the DB
        
        parser -- instance of a Parser class with which to parse
        ignore_duplicates -- save RFP even if already in the DB
        start_id -- begin parsing at original_id == start_id
        stop_on_dupe -- halt parse job if we hit a duplicate
        limit -- parse at most `limit` jobs
        """

        parsed_total = 0
        parsed_new = 0
        page = 0

        # skip RFPs until found given start_id. Handy for resuming a parse job
        skip = start_id is not None

        while parser.has_next():
            page += 1
            rfps = parser.next(parse_each=False)
            parsed_total = parsed_total + len(rfps)

            for r in rfps:
                title = r['title'].encode('utf-8')
                # skip if given an ID to resume parsing from
                if skip: 
                    if start_id != r['original_id']:
                        logging.info( 'Skipping while waiting for %d: %s' % (start_id, title) )
                        continue
                    else:
                        skip = False
                        logging.info( 'Resuming parsing from ID: %s' % r['original_id'] )

                # check if we've parsed this RFP before
                if not ignore_duplicates:
                   db_match = RFP.by_original_id( r['origin'], r['original_id'] )
                   
                   # either skip if this RFP is already parsed, or stop parsing
                   if db_match.count() != 0:
                       if stop_on_dupe:
                           logging.info( 'Stopping early on RFP: %s' % title )
                           return (parsed_total, parsed_new)
                       else:
                           logging.info( 'Skipping existing RFP: %s.' % title )
                           continue

                   else:
                       # stop early if there's a limit on number of RFPs parsed
                       if limit is not None and limit <= parsed_new:
                           logging.info( 'Stopping early due to limit: %s' % title )
                           return (parsed_total, parsed_new)

                       rfp = RFP.from_dict( parser.parse_details(r) )
                       rfp.put()
                       rfp.index()
                       parsed_new += 1

                logging.info( u'Saving new RFP: %s' % rfp )
            logging.info( 'Parsed page %d of Merx results' % page )

        return (parsed_total, parsed_new)