def __init__(self,test_mode=0): self.test_mode = test_mode self.tracker_list = XMLWrapper(cpolicy.TRACKER_FILE, tag='TrackerList') self.interest_list = XMLWrapper(cpolicy.INTEREST_FILE, tag='InterestList') self.submitted_list = XMLWrapper(cpolicy.SUBMITTED_FILE) pol = policy.get_policy() self.webreq = WebServiceRequest((pol(policy.WEBSERVICE_IP), pol(policy.WEBSERVICE_PORT)), pol(policy.WEBSERVICE_ID)) self.ignore_wait = pol(policy.IGNORE_WAITING_MEDIA) self.log = get_logger() self.wait_list = XMLWrapper(cpolicy.WAIT_FILE, load=(self.ignore_wait==0)) self.failed_list = XMLWrapper(cpolicy.WAIT_FILE, load=0)
class Crawler: def __init__(self,test_mode=0): self.test_mode = test_mode self.tracker_list = XMLWrapper(cpolicy.TRACKER_FILE, tag='TrackerList') self.interest_list = XMLWrapper(cpolicy.INTEREST_FILE, tag='InterestList') self.submitted_list = XMLWrapper(cpolicy.SUBMITTED_FILE) pol = policy.get_policy() self.webreq = WebServiceRequest((pol(policy.WEBSERVICE_IP), pol(policy.WEBSERVICE_PORT)), pol(policy.WEBSERVICE_ID)) self.ignore_wait = pol(policy.IGNORE_WAITING_MEDIA) self.log = get_logger() self.wait_list = XMLWrapper(cpolicy.WAIT_FILE, load=(self.ignore_wait==0)) self.failed_list = XMLWrapper(cpolicy.WAIT_FILE, load=0) def process_tracker(self,tracker): Filter = get_filter(tracker.filter) Loader = get_loader(tracker.loader) loader = Loader(tracker,Filter(self.interest_list,tracker.publisher)) media_list = loader.fetch() for media in media_list: if not media in self.submitted_list and \ not media in self.wait_list: media.fetch() self.wait_list.append(media) def process(self): self.preprocess() for tracker in self.tracker_list: self.process_tracker(tracker) for media in self.wait_list: if self.test_mode: print media continue if self.submit(media): self.submitted_list.append(media) else: self.failed_list.append(media) self.postprocess() def submit(self,media): ret = 0 media.fetch() if not media.exists(): self.log.error('submission failed: %s not exists\n' % media.title) return ret try: key,value = self.webreq.add(urllib.quote(media.filename())) if value == 'OK': self.log.info('%s submitted\n' % media.title) ret = 1 else: self.log.error('submission failed: %s %s\n' % (media.title,value)) except Exception,why: self.log.warn('unexpected exception: %s\n' % str(why)) return ret