Ejemplo n.º 1
0
 def __init__(self, threadcnt):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'HenanGaokaoSpider'
     self.pagestore = HenanGaokaoStore()
     self.job_spliter = HenanSpliter()
     self._cur_page = 0
     self._test_mode = True
Ejemplo n.º 2
0
 def __init__(self, threadcnt):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'WulumuqiCourt'
     self.pagestore = WLMQCourtStore()
     self.job_spliter = WLMQSpliter()
     self._test_mode = False
     Doc2Txt.init()
Ejemplo n.º 3
0
 def __init__(self,
              threadcnt=10,
              seed_file=None,
              mode='links',
              list_file='links',
              recover=False,
              test=False):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'HangzhouCourt'
     self.pagestore = HZCourtStore()
     self.job_spliter = HZSpliter()
     self._test_mode = test
     self.pagestore.testmode = test
     self.list_data = {
         'pageno': '1',
         'pagesize': '20',
         'ajlb': '',
         'cbfy': '1300',
         'ah': '',
         'jarq1': '19700101',
         'jarq2': time.strftime('%Y%m%d', time.localtime()),
         'key': ''
     }
     self.seed_file = seed_file
     self.page_size = 50
     self.mode = mode
     self.list_file = list_file
     self.recover = recover
     self.today = time.strftime('%Y%m%d', time.localtime())
     self.link_saver = LinkSaver(self.list_file)
Ejemplo n.º 4
0
 def __init__(self, endFbrq='', startFbrq='', thread_cnt=5):
     CourtSpider.__init__(self, thread_cnt)
     self._name = 'ChangchunCourt'
     self.pagestore = CCCourtStore()
     self.job_spliter = CCSpliter()
     self.startFbrq = startFbrq
     self.endFbrq = endFbrq
Ejemplo n.º 5
0
 def __init__(self, threadcnt, list_seeds=None):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'FoshanCourt'
     self.pagestore = FSCourtStore()
     self.job_spliter = FSSpliter()
     self._test_mode = True
     self.page_size = 20
     self.list_seeds = list_seeds
Ejemplo n.º 6
0
 def __init__(self, threadcnt):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'YantianCourt'
     self.pagestore = YantianCourtStore()
     self.job_spliter = YantianSpliter()
     self._cur_page = 0
     self._test_mode = True
     self._remain_job_file = 'jobs_remain'
     self.register_signal()
Ejemplo n.º 7
0
    def __init__(self, thread_count=5, name='ShanghaiCourtListSpider', log='list.spider.log',
                 out='links',
                 recover=False):
        CourtSpider.__init__(self, thread_count, log)

        self._name = name
        self.pagestore = ShanghaiSeedStore()
        self.linkdb = ShanghaiLinkDb('sh_link')
        self.seedb = ShanghaiLinkDb('sh_seed')
        self.link_saver = LinkSaver(out)
        self.lock = threading.Lock()
        self.pager_failed_count = 0
        self.recover = recover
Ejemplo n.º 8
0
 def __init__(self, threadcnt):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'FutianCourt'
     self.pagestore = FutianCourtStore()
     self.job_spliter = FutianSpliter()
     self._test_mode = True
Ejemplo n.º 9
0
 def __init__(self, threadcnt):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'dgCourt'
     self.pagestore = DGCourtStore()
     self.job_spliter = DGSpliter()
     Doc2Txt.init()
Ejemplo n.º 10
0
 def __init__(self, threadcnt):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'cqnaCourt'
     self.pagestore = CQNACourtStore()
     self.job_spliter = CQNASpliter()
Ejemplo n.º 11
0
 def __init__(self, threadcnt):
     CourtSpider.__init__(self, threadcnt)
     self._name = 'FoshanLinkSpider'
     self._test_mode = True
     self.page_size = 20
     self.link_saver = LinkSaver("links")