Ejemplo n.º 1
0
 def process_item(self, item, spider):
     # check by title
     if RejectionPattern.should_be_rejected(item.job_title):
         raise DropItem('Job matches rejection pattern. Removing...')
     if RejectionPattern.should_be_rejected(item.job_desc):
         raise DropItem('Job Description matches rejection pattern. Removing...')
     return item
Ejemplo n.º 2
0
 def test_should_be_rejected(self):
     RejectionPattern("[1-9]+").save()
     self.assertTrue(RejectionPattern.should_be_rejected("9887"), "input_text should be rejected")
     self.assertFalse(RejectionPattern.should_be_rejected("abcd"), "input_text should not be rejected")
     RejectionPattern(u"(?<!非)中介").save()
     self.assertTrue(RejectionPattern.should_be_rejected(u"中介"), "input_text should be rejected")
     self.assertTrue(RejectionPattern.should_be_rejected(u"是中介"), "input_text should be rejected")
     self.assertFalse(RejectionPattern.should_be_rejected(u"非中介"), "input_text should not be rejected")
Ejemplo n.º 3
0
 def process_item(self, item, spider):
     # check by title
     if RejectionPattern.should_be_rejected(item.job_title):
         raise DropItem('Job matches rejection pattern. Removing...')
     if RejectionPattern.should_be_rejected(item.job_desc):
         raise DropItem(
             'Job Description matches rejection pattern. Removing...')
     return item
Ejemplo n.º 4
0
 def test_should_be_rejected(self):
     RejectionPattern('[1-9]+').save()
     self.assertTrue(RejectionPattern.should_be_rejected('9887'), 'input_text should be rejected')
     self.assertFalse(RejectionPattern.should_be_rejected('abcd'), 'input_text should not be rejected')
     RejectionPattern(u'(?<!非)中介').save()
     self.assertTrue(RejectionPattern.should_be_rejected(u'中介'), 'input_text should be rejected')
     self.assertTrue(RejectionPattern.should_be_rejected(u'是中介'), 'input_text should be rejected')
     self.assertFalse(RejectionPattern.should_be_rejected(u'非中介'), 'input_text should not be rejected')
Ejemplo n.º 5
0
    def should_load_details(self, job_item):
        if JobItem.is_exists(job_item):
            logger.info(
                '[%s] skipping loading details as job already exists. job_title: %s'
                % (self.name, job_item.job_title))
            return False
        if JobItem.is_older_required(job_item):
            logger.info(
                '[%s] skipping loading details as job is older than %s days. job_title: %s'
                % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN),
                   job_item.job_title))
            return False

        if BlockedContact.is_contact_blocked(job_item.contact):
            logger.info(
                '[%s] skipping loading details as job contact is blocked. contact: %s'
                % (self.name, job_item.contact))
            return False

        if RejectionPattern.should_be_rejected(job_item.job_title):
            logger.info(
                '[%s] skipping loading details as job matches rejection pattern. job_title: %s'
                % (self.name, job_item.job_title))
            return False

        return True
Ejemplo n.º 6
0
    def should_load_details(self, job_item):
        if JobItem.is_exists(job_item):
            logger.info('[%s] skipping loading details as job already exists. job_title: %s' % (self.name, job_item.job_title))
            return False
        if JobItem.is_older_required(job_item):
            logger.info('[%s] skipping loading details as job is older than %s days. job_title: %s' % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN), job_item.job_title))
            return False

        if BlockedContact.is_contact_blocked(job_item.contact):
            logger.info('[%s] skipping loading details as job contact is blocked. contact: %s' % (self.name, job_item.contact))
            return False

        if RejectionPattern.should_be_rejected(job_item.job_title):
            logger.info('[%s] skipping loading details as job matches rejection pattern. job_title: %s' % (self.name, job_item.job_title))
            return False

        return True