def process_item(self, item, spider): # check by title if RejectionPattern.should_be_rejected(item.job_title): raise DropItem('Job matches rejection pattern. Removing...') if RejectionPattern.should_be_rejected(item.job_desc): raise DropItem('Job Description matches rejection pattern. Removing...') return item
def test_find_with_pagination(self): for i in range(0, 20): RejectionPattern('Reject_pattern_%d' % i, '').save() records = RejectionPattern.find_with_pagination(page_request={'page_no': 2, 'size': 10}) print 'items', records self.assertEqual(10, len(records))
def test_find_all(self): self.rejection_pattern.save() another_rejection_pattern = RejectionPattern("[a-z]+", u"人员") another_rejection_pattern.save() records = RejectionPattern.findall() print "rejection_pattern", records self.assertEqual(2, len(records))
def test_should_be_rejected(self): RejectionPattern("[1-9]+").save() self.assertTrue(RejectionPattern.should_be_rejected("9887"), "input_text should be rejected") self.assertFalse(RejectionPattern.should_be_rejected("abcd"), "input_text should not be rejected") RejectionPattern(u"(?<!非)中介").save() self.assertTrue(RejectionPattern.should_be_rejected(u"中介"), "input_text should be rejected") self.assertTrue(RejectionPattern.should_be_rejected(u"是中介"), "input_text should be rejected") self.assertFalse(RejectionPattern.should_be_rejected(u"非中介"), "input_text should not be rejected")
def test_find_all(self): self.rejection_pattern.save() another_rejection_pattern = RejectionPattern('[a-z]+', u'人员') another_rejection_pattern.save() records = RejectionPattern.findall() print 'rejection_pattern', records self.assertEqual(2, len(records))
def test_extract_records_as_bytes(self): RejectionPattern("Pattern1", "testing1").save() RejectionPattern("Pattern2").save() RejectionPattern("Pattern3", "测试").save() print "Content as txt: ", RejectionPattern.extract_records_as_bytes("txt") print "Content as excel: ", RejectionPattern.extract_records_as_bytes("xlsx") print "Content as csv: ", RejectionPattern.extract_records_as_bytes("csv")
def process_item(self, item, spider): # check by title if RejectionPattern.should_be_rejected(item.job_title): raise DropItem('Job matches rejection pattern. Removing...') if RejectionPattern.should_be_rejected(item.job_desc): raise DropItem( 'Job Description matches rejection pattern. Removing...') return item
def test_save(self): RejectionPattern("Something").save() RejectionPattern(u"人员", None).save() print RejectionPattern.findall() conn = self.connect_db() try: c = conn.cursor() c.execute("SELECT COUNT(*) FROM " + RejectionPattern.table_name) self.assertEqual(c.fetchone()[0], 2, "Count of rejection_pattern should be 2") except: pass finally: conn.close()
def test_save(self): RejectionPattern('Something').save() RejectionPattern(u'人员', None).save() print RejectionPattern.findall() conn = self.connect_db() try: c = conn.cursor() c.execute('SELECT COUNT(*) FROM ' + RejectionPattern.table_name) self.assertEqual(c.fetchone()[0], 2, 'Count of rejection_pattern should be 2') except: pass finally: conn.close()
def should_load_details(self, job_item): if JobItem.is_exists(job_item): logger.info( '[%s] skipping loading details as job already exists. job_title: %s' % (self.name, job_item.job_title)) return False if JobItem.is_older_required(job_item): logger.info( '[%s] skipping loading details as job is older than %s days. job_title: %s' % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN), job_item.job_title)) return False if BlockedContact.is_contact_blocked(job_item.contact): logger.info( '[%s] skipping loading details as job contact is blocked. contact: %s' % (self.name, job_item.contact)) return False if RejectionPattern.should_be_rejected(job_item.job_title): logger.info( '[%s] skipping loading details as job matches rejection pattern. job_title: %s' % (self.name, job_item.job_title)) return False return True
def test_find_with_pagination(self): for i in range(0, 20): RejectionPattern("Reject_pattern_%d" % i, "").save() records = RejectionPattern.find_with_pagination(page_request={"page_no": 2, "size": 10}) print "items", records self.assertEqual(10, len(records))
def test_should_be_rejected(self): RejectionPattern('[1-9]+').save() self.assertTrue(RejectionPattern.should_be_rejected('9887'), 'input_text should be rejected') self.assertFalse(RejectionPattern.should_be_rejected('abcd'), 'input_text should not be rejected') RejectionPattern(u'(?<!非)中介').save() self.assertTrue(RejectionPattern.should_be_rejected(u'中介'), 'input_text should be rejected') self.assertTrue(RejectionPattern.should_be_rejected(u'是中介'), 'input_text should be rejected') self.assertFalse(RejectionPattern.should_be_rejected(u'非中介'), 'input_text should not be rejected')
def test_extract_records_as_bytes(self): RejectionPattern('Pattern1', 'testing1').save() RejectionPattern('Pattern2').save() RejectionPattern('Pattern3', '测试').save() print 'Content as txt: ', RejectionPattern.extract_records_as_bytes('txt') print 'Content as excel: ', RejectionPattern.extract_records_as_bytes('xlsx') print 'Content as csv: ', RejectionPattern.extract_records_as_bytes('csv')
def should_load_details(self, job_item): if JobItem.is_exists(job_item): logger.info('[%s] skipping loading details as job already exists. job_title: %s' % (self.name, job_item.job_title)) return False if JobItem.is_older_required(job_item): logger.info('[%s] skipping loading details as job is older than %s days. job_title: %s' % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN), job_item.job_title)) return False if BlockedContact.is_contact_blocked(job_item.contact): logger.info('[%s] skipping loading details as job contact is blocked. contact: %s' % (self.name, job_item.contact)) return False if RejectionPattern.should_be_rejected(job_item.job_title): logger.info('[%s] skipping loading details as job matches rejection pattern. job_title: %s' % (self.name, job_item.job_title)) return False return True
def test_remove_records_matches_rejection_pattern(self): for i in range(0, 20): job_item = JobItem() job_item.job_title=u'人员_%d' % i job_item.save() # mark the title as blocked RejectionPattern(u'人员_\d+', 'For Testing').save() # run the remove action JobItem.remove_records_matches_rejection_pattern() conn = self.connect_db() try: c = conn.cursor() c.execute('SELECT COUNT(*) FROM ' + JobItem.table_name) self.assertEqual(c.fetchone()[0], 0, 'Count of job items should be 0') except: pass finally: conn.close()
def setUp(self): self.clean_db() self.rejection_pattern = RejectionPattern('[1-9]+', u'人员')
def test_find(self): self.rejection_pattern.save() result = RejectionPattern.find(self.rejection_pattern) self.assertEqual(self.rejection_pattern.reject_pattern, result.reject_pattern, 'Item found should be the same as saved')
def test_find(self): self.rejection_pattern.save() result = RejectionPattern.find(self.rejection_pattern) self.assertEqual( self.rejection_pattern.reject_pattern, result.reject_pattern, "Item found should be the same as saved" )
def setUp(self): self.clean_db() self.rejection_pattern = RejectionPattern("[1-9]+", u"人员")
class RejectionPatternTest(BaseTestCase): def setUp(self): self.clean_db() self.rejection_pattern = RejectionPattern("[1-9]+", u"人员") def tearDown(self): pass def test_save(self): RejectionPattern("Something").save() RejectionPattern(u"人员", None).save() print RejectionPattern.findall() conn = self.connect_db() try: c = conn.cursor() c.execute("SELECT COUNT(*) FROM " + RejectionPattern.table_name) self.assertEqual(c.fetchone()[0], 2, "Count of rejection_pattern should be 2") except: pass finally: conn.close() def test_find_all(self): self.rejection_pattern.save() another_rejection_pattern = RejectionPattern("[a-z]+", u"人员") another_rejection_pattern.save() records = RejectionPattern.findall() print "rejection_pattern", records self.assertEqual(2, len(records)) def test_find(self): self.rejection_pattern.save() result = RejectionPattern.find(self.rejection_pattern) self.assertEqual( self.rejection_pattern.reject_pattern, result.reject_pattern, "Item found should be the same as saved" ) def test_remove(self): self.rejection_pattern.save() self.rejection_pattern.remove() conn = self.connect_db() try: c = conn.cursor() c.execute("SELECT COUNT(*) FROM " + RejectionPattern.table_name) self.assertEqual(c.fetchone()[0], 0, "Count of rejection_pattern should be 0 after removing") except: pass finally: conn.close() def test_should_be_rejected(self): RejectionPattern("[1-9]+").save() self.assertTrue(RejectionPattern.should_be_rejected("9887"), "input_text should be rejected") self.assertFalse(RejectionPattern.should_be_rejected("abcd"), "input_text should not be rejected") RejectionPattern(u"(?<!非)中介").save() self.assertTrue(RejectionPattern.should_be_rejected(u"中介"), "input_text should be rejected") self.assertTrue(RejectionPattern.should_be_rejected(u"是中介"), "input_text should be rejected") self.assertFalse(RejectionPattern.should_be_rejected(u"非中介"), "input_text should not be rejected") def test_extract_records_as_bytes(self): RejectionPattern("Pattern1", "testing1").save() RejectionPattern("Pattern2").save() RejectionPattern("Pattern3", "测试").save() print "Content as txt: ", RejectionPattern.extract_records_as_bytes("txt") print "Content as excel: ", RejectionPattern.extract_records_as_bytes("xlsx") print "Content as csv: ", RejectionPattern.extract_records_as_bytes("csv") def test_get_instance_classname(self): self.assertEqual("RejectionPatternTest", self.__class__.__name__) @classmethod def test_get_classname(cls): print cls.__name__ def test_find_with_pagination(self): for i in range(0, 20): RejectionPattern("Reject_pattern_%d" % i, "").save() records = RejectionPattern.find_with_pagination(page_request={"page_no": 2, "size": 10}) print "items", records self.assertEqual(10, len(records))
class RejectionPatternTest(BaseTestCase): def setUp(self): self.clean_db() self.rejection_pattern = RejectionPattern('[1-9]+', u'人员') def tearDown(self): pass def test_save(self): RejectionPattern('Something').save() RejectionPattern(u'人员', None).save() print RejectionPattern.findall() conn = self.connect_db() try: c = conn.cursor() c.execute('SELECT COUNT(*) FROM ' + RejectionPattern.table_name) self.assertEqual(c.fetchone()[0], 2, 'Count of rejection_pattern should be 2') except: pass finally: conn.close() def test_find_all(self): self.rejection_pattern.save() another_rejection_pattern = RejectionPattern('[a-z]+', u'人员') another_rejection_pattern.save() records = RejectionPattern.findall() print 'rejection_pattern', records self.assertEqual(2, len(records)) def test_find(self): self.rejection_pattern.save() result = RejectionPattern.find(self.rejection_pattern) self.assertEqual(self.rejection_pattern.reject_pattern, result.reject_pattern, 'Item found should be the same as saved') def test_remove(self): self.rejection_pattern.save() self.rejection_pattern.remove() conn = self.connect_db() try: c = conn.cursor() c.execute('SELECT COUNT(*) FROM ' + RejectionPattern.table_name) self.assertEqual(c.fetchone()[0], 0, 'Count of rejection_pattern should be 0 after removing') except: pass finally: conn.close() def test_should_be_rejected(self): RejectionPattern('[1-9]+').save() self.assertTrue(RejectionPattern.should_be_rejected('9887'), 'input_text should be rejected') self.assertFalse(RejectionPattern.should_be_rejected('abcd'), 'input_text should not be rejected') RejectionPattern(u'(?<!非)中介').save() self.assertTrue(RejectionPattern.should_be_rejected(u'中介'), 'input_text should be rejected') self.assertTrue(RejectionPattern.should_be_rejected(u'是中介'), 'input_text should be rejected') self.assertFalse(RejectionPattern.should_be_rejected(u'非中介'), 'input_text should not be rejected') def test_extract_records_as_bytes(self): RejectionPattern('Pattern1', 'testing1').save() RejectionPattern('Pattern2').save() RejectionPattern('Pattern3', '测试').save() print 'Content as txt: ', RejectionPattern.extract_records_as_bytes('txt') print 'Content as excel: ', RejectionPattern.extract_records_as_bytes('xlsx') print 'Content as csv: ', RejectionPattern.extract_records_as_bytes('csv') def test_get_instance_classname(self): self.assertEqual('RejectionPatternTest', self.__class__.__name__) @classmethod def test_get_classname(cls): print cls.__name__ def test_find_with_pagination(self): for i in range(0, 20): RejectionPattern('Reject_pattern_%d' % i, '').save() records = RejectionPattern.find_with_pagination(page_request={'page_no': 2, 'size': 10}) print 'items', records self.assertEqual(10, len(records))