Beispiel #1
0
 def process_item(self, item, spider):
     # check by title
     if RejectionPattern.should_be_rejected(item.job_title):
         raise DropItem('Job matches rejection pattern. Removing...')
     if RejectionPattern.should_be_rejected(item.job_desc):
         raise DropItem('Job Description matches rejection pattern. Removing...')
     return item
Beispiel #2
0
    def test_find_with_pagination(self):
        for i in range(0, 20):
            RejectionPattern('Reject_pattern_%d' % i, '').save()

        records = RejectionPattern.find_with_pagination(page_request={'page_no': 2, 'size': 10})

        print 'items', records
        self.assertEqual(10, len(records))
Beispiel #3
0
    def test_find_all(self):
        self.rejection_pattern.save()
        another_rejection_pattern = RejectionPattern("[a-z]+", u"人员")
        another_rejection_pattern.save()

        records = RejectionPattern.findall()
        print "rejection_pattern", records
        self.assertEqual(2, len(records))
Beispiel #4
0
 def test_should_be_rejected(self):
     RejectionPattern("[1-9]+").save()
     self.assertTrue(RejectionPattern.should_be_rejected("9887"), "input_text should be rejected")
     self.assertFalse(RejectionPattern.should_be_rejected("abcd"), "input_text should not be rejected")
     RejectionPattern(u"(?<!非)中介").save()
     self.assertTrue(RejectionPattern.should_be_rejected(u"中介"), "input_text should be rejected")
     self.assertTrue(RejectionPattern.should_be_rejected(u"是中介"), "input_text should be rejected")
     self.assertFalse(RejectionPattern.should_be_rejected(u"非中介"), "input_text should not be rejected")
Beispiel #5
0
    def test_find_all(self):
        self.rejection_pattern.save()
        another_rejection_pattern = RejectionPattern('[a-z]+', u'人员')
        another_rejection_pattern.save()

        records = RejectionPattern.findall()
        print 'rejection_pattern', records
        self.assertEqual(2, len(records))
Beispiel #6
0
    def test_extract_records_as_bytes(self):
        RejectionPattern("Pattern1", "testing1").save()
        RejectionPattern("Pattern2").save()
        RejectionPattern("Pattern3", "测试").save()

        print "Content as txt: ", RejectionPattern.extract_records_as_bytes("txt")
        print "Content as excel: ", RejectionPattern.extract_records_as_bytes("xlsx")
        print "Content as csv: ", RejectionPattern.extract_records_as_bytes("csv")
Beispiel #7
0
 def process_item(self, item, spider):
     # check by title
     if RejectionPattern.should_be_rejected(item.job_title):
         raise DropItem('Job matches rejection pattern. Removing...')
     if RejectionPattern.should_be_rejected(item.job_desc):
         raise DropItem(
             'Job Description matches rejection pattern. Removing...')
     return item
Beispiel #8
0
    def test_save(self):
        RejectionPattern("Something").save()
        RejectionPattern(u"人员", None).save()

        print RejectionPattern.findall()

        conn = self.connect_db()
        try:
            c = conn.cursor()
            c.execute("SELECT COUNT(*) FROM " + RejectionPattern.table_name)
            self.assertEqual(c.fetchone()[0], 2, "Count of rejection_pattern should be 2")
        except:
            pass
        finally:
            conn.close()
Beispiel #9
0
    def test_save(self):
        RejectionPattern('Something').save()
        RejectionPattern(u'人员', None).save()

        print RejectionPattern.findall()

        conn = self.connect_db()
        try:
            c = conn.cursor()
            c.execute('SELECT COUNT(*) FROM ' + RejectionPattern.table_name)
            self.assertEqual(c.fetchone()[0], 2, 'Count of rejection_pattern should be 2')
        except:
            pass
        finally:
            conn.close()
Beispiel #10
0
    def should_load_details(self, job_item):
        if JobItem.is_exists(job_item):
            logger.info(
                '[%s] skipping loading details as job already exists. job_title: %s'
                % (self.name, job_item.job_title))
            return False
        if JobItem.is_older_required(job_item):
            logger.info(
                '[%s] skipping loading details as job is older than %s days. job_title: %s'
                % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN),
                   job_item.job_title))
            return False

        if BlockedContact.is_contact_blocked(job_item.contact):
            logger.info(
                '[%s] skipping loading details as job contact is blocked. contact: %s'
                % (self.name, job_item.contact))
            return False

        if RejectionPattern.should_be_rejected(job_item.job_title):
            logger.info(
                '[%s] skipping loading details as job matches rejection pattern. job_title: %s'
                % (self.name, job_item.job_title))
            return False

        return True
Beispiel #11
0
    def test_find_with_pagination(self):
        for i in range(0, 20):
            RejectionPattern("Reject_pattern_%d" % i, "").save()

        records = RejectionPattern.find_with_pagination(page_request={"page_no": 2, "size": 10})

        print "items", records
        self.assertEqual(10, len(records))
Beispiel #12
0
 def test_should_be_rejected(self):
     RejectionPattern('[1-9]+').save()
     self.assertTrue(RejectionPattern.should_be_rejected('9887'), 'input_text should be rejected')
     self.assertFalse(RejectionPattern.should_be_rejected('abcd'), 'input_text should not be rejected')
     RejectionPattern(u'(?<!非)中介').save()
     self.assertTrue(RejectionPattern.should_be_rejected(u'中介'), 'input_text should be rejected')
     self.assertTrue(RejectionPattern.should_be_rejected(u'是中介'), 'input_text should be rejected')
     self.assertFalse(RejectionPattern.should_be_rejected(u'非中介'), 'input_text should not be rejected')
Beispiel #13
0
    def test_extract_records_as_bytes(self):
        RejectionPattern('Pattern1', 'testing1').save()
        RejectionPattern('Pattern2').save()
        RejectionPattern('Pattern3', '测试').save()

        print 'Content as txt: ', RejectionPattern.extract_records_as_bytes('txt')
        print 'Content as excel: ', RejectionPattern.extract_records_as_bytes('xlsx')
        print 'Content as csv: ', RejectionPattern.extract_records_as_bytes('csv')
Beispiel #14
0
    def should_load_details(self, job_item):
        if JobItem.is_exists(job_item):
            logger.info('[%s] skipping loading details as job already exists. job_title: %s' % (self.name, job_item.job_title))
            return False
        if JobItem.is_older_required(job_item):
            logger.info('[%s] skipping loading details as job is older than %s days. job_title: %s' % (self.name, str(config.HOUSEKEEPING_RECORD_ORDLER_THAN), job_item.job_title))
            return False

        if BlockedContact.is_contact_blocked(job_item.contact):
            logger.info('[%s] skipping loading details as job contact is blocked. contact: %s' % (self.name, job_item.contact))
            return False

        if RejectionPattern.should_be_rejected(job_item.job_title):
            logger.info('[%s] skipping loading details as job matches rejection pattern. job_title: %s' % (self.name, job_item.job_title))
            return False

        return True
Beispiel #15
0
    def test_remove_records_matches_rejection_pattern(self):
        for i in range(0, 20):
            job_item = JobItem()
            job_item.job_title=u'人员_%d' % i
            job_item.save()

        # mark the title as blocked
        RejectionPattern(u'人员_\d+', 'For Testing').save()

        # run the remove action
        JobItem.remove_records_matches_rejection_pattern()

        conn = self.connect_db()
        try:
            c = conn.cursor()
            c.execute('SELECT COUNT(*) FROM ' + JobItem.table_name)
            self.assertEqual(c.fetchone()[0], 0, 'Count of job items should be 0')
        except:
            pass
        finally:
            conn.close()
Beispiel #16
0
 def setUp(self):
     self.clean_db()
     self.rejection_pattern = RejectionPattern('[1-9]+', u'人员')
Beispiel #17
0
 def test_find(self):
     self.rejection_pattern.save()
     result = RejectionPattern.find(self.rejection_pattern)
     self.assertEqual(self.rejection_pattern.reject_pattern, result.reject_pattern, 'Item found should be the same as saved')
Beispiel #18
0
 def test_find(self):
     self.rejection_pattern.save()
     result = RejectionPattern.find(self.rejection_pattern)
     self.assertEqual(
         self.rejection_pattern.reject_pattern, result.reject_pattern, "Item found should be the same as saved"
     )
Beispiel #19
0
 def setUp(self):
     self.clean_db()
     self.rejection_pattern = RejectionPattern("[1-9]+", u"人员")
Beispiel #20
0
class RejectionPatternTest(BaseTestCase):
    def setUp(self):
        self.clean_db()
        self.rejection_pattern = RejectionPattern("[1-9]+", u"人员")

    def tearDown(self):
        pass

    def test_save(self):
        RejectionPattern("Something").save()
        RejectionPattern(u"人员", None).save()

        print RejectionPattern.findall()

        conn = self.connect_db()
        try:
            c = conn.cursor()
            c.execute("SELECT COUNT(*) FROM " + RejectionPattern.table_name)
            self.assertEqual(c.fetchone()[0], 2, "Count of rejection_pattern should be 2")
        except:
            pass
        finally:
            conn.close()

    def test_find_all(self):
        self.rejection_pattern.save()
        another_rejection_pattern = RejectionPattern("[a-z]+", u"人员")
        another_rejection_pattern.save()

        records = RejectionPattern.findall()
        print "rejection_pattern", records
        self.assertEqual(2, len(records))

    def test_find(self):
        self.rejection_pattern.save()
        result = RejectionPattern.find(self.rejection_pattern)
        self.assertEqual(
            self.rejection_pattern.reject_pattern, result.reject_pattern, "Item found should be the same as saved"
        )

    def test_remove(self):
        self.rejection_pattern.save()
        self.rejection_pattern.remove()
        conn = self.connect_db()
        try:
            c = conn.cursor()
            c.execute("SELECT COUNT(*) FROM " + RejectionPattern.table_name)
            self.assertEqual(c.fetchone()[0], 0, "Count of rejection_pattern should be 0 after removing")
        except:
            pass
        finally:
            conn.close()

    def test_should_be_rejected(self):
        RejectionPattern("[1-9]+").save()
        self.assertTrue(RejectionPattern.should_be_rejected("9887"), "input_text should be rejected")
        self.assertFalse(RejectionPattern.should_be_rejected("abcd"), "input_text should not be rejected")
        RejectionPattern(u"(?<!非)中介").save()
        self.assertTrue(RejectionPattern.should_be_rejected(u"中介"), "input_text should be rejected")
        self.assertTrue(RejectionPattern.should_be_rejected(u"是中介"), "input_text should be rejected")
        self.assertFalse(RejectionPattern.should_be_rejected(u"非中介"), "input_text should not be rejected")

    def test_extract_records_as_bytes(self):
        RejectionPattern("Pattern1", "testing1").save()
        RejectionPattern("Pattern2").save()
        RejectionPattern("Pattern3", "测试").save()

        print "Content as txt: ", RejectionPattern.extract_records_as_bytes("txt")
        print "Content as excel: ", RejectionPattern.extract_records_as_bytes("xlsx")
        print "Content as csv: ", RejectionPattern.extract_records_as_bytes("csv")

    def test_get_instance_classname(self):
        self.assertEqual("RejectionPatternTest", self.__class__.__name__)

    @classmethod
    def test_get_classname(cls):
        print cls.__name__

    def test_find_with_pagination(self):
        for i in range(0, 20):
            RejectionPattern("Reject_pattern_%d" % i, "").save()

        records = RejectionPattern.find_with_pagination(page_request={"page_no": 2, "size": 10})

        print "items", records
        self.assertEqual(10, len(records))
Beispiel #21
0
class RejectionPatternTest(BaseTestCase):
    def setUp(self):
        self.clean_db()
        self.rejection_pattern = RejectionPattern('[1-9]+', u'人员')

    def tearDown(self):
        pass

    def test_save(self):
        RejectionPattern('Something').save()
        RejectionPattern(u'人员', None).save()

        print RejectionPattern.findall()

        conn = self.connect_db()
        try:
            c = conn.cursor()
            c.execute('SELECT COUNT(*) FROM ' + RejectionPattern.table_name)
            self.assertEqual(c.fetchone()[0], 2, 'Count of rejection_pattern should be 2')
        except:
            pass
        finally:
            conn.close()

    def test_find_all(self):
        self.rejection_pattern.save()
        another_rejection_pattern = RejectionPattern('[a-z]+', u'人员')
        another_rejection_pattern.save()

        records = RejectionPattern.findall()
        print 'rejection_pattern', records
        self.assertEqual(2, len(records))

    def test_find(self):
        self.rejection_pattern.save()
        result = RejectionPattern.find(self.rejection_pattern)
        self.assertEqual(self.rejection_pattern.reject_pattern, result.reject_pattern, 'Item found should be the same as saved')

    def test_remove(self):
        self.rejection_pattern.save()
        self.rejection_pattern.remove()
        conn = self.connect_db()
        try:
            c = conn.cursor()
            c.execute('SELECT COUNT(*) FROM ' + RejectionPattern.table_name)
            self.assertEqual(c.fetchone()[0], 0, 'Count of rejection_pattern should be 0 after removing')
        except:
            pass
        finally:
            conn.close()

    def test_should_be_rejected(self):
        RejectionPattern('[1-9]+').save()
        self.assertTrue(RejectionPattern.should_be_rejected('9887'), 'input_text should be rejected')
        self.assertFalse(RejectionPattern.should_be_rejected('abcd'), 'input_text should not be rejected')
        RejectionPattern(u'(?<!非)中介').save()
        self.assertTrue(RejectionPattern.should_be_rejected(u'中介'), 'input_text should be rejected')
        self.assertTrue(RejectionPattern.should_be_rejected(u'是中介'), 'input_text should be rejected')
        self.assertFalse(RejectionPattern.should_be_rejected(u'非中介'), 'input_text should not be rejected')

    def test_extract_records_as_bytes(self):
        RejectionPattern('Pattern1', 'testing1').save()
        RejectionPattern('Pattern2').save()
        RejectionPattern('Pattern3', '测试').save()

        print 'Content as txt: ', RejectionPattern.extract_records_as_bytes('txt')
        print 'Content as excel: ', RejectionPattern.extract_records_as_bytes('xlsx')
        print 'Content as csv: ', RejectionPattern.extract_records_as_bytes('csv')

    def test_get_instance_classname(self):
        self.assertEqual('RejectionPatternTest', self.__class__.__name__)

    @classmethod
    def test_get_classname(cls):
        print cls.__name__


    def test_find_with_pagination(self):
        for i in range(0, 20):
            RejectionPattern('Reject_pattern_%d' % i, '').save()

        records = RejectionPattern.find_with_pagination(page_request={'page_no': 2, 'size': 10})

        print 'items', records
        self.assertEqual(10, len(records))