def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile( ur'^http://\w+\.ganji\.com(|/|/fang1/?|/fang1/\d+x\.htm|/fang1/tuiguang[-]\d+.htm|/fang1/f\d+/?)$', re.U | re.I) self.arch_pattern = re.compile( ur'[房户](\s| )*型[^::]*[::]([^\d]*)(\d[^<>\s]+)[\s<]', re.U | re.I)
def run(T): t = 0. bot = Robot() tau = 0.0005 while t < T: print bot.q bot.log.append(bot.q) bot.next_pos(tau) t += tau speedup = 0.1 show('jump.html', bot.log, tau/speedup)
def is_valid_url(self, url): #print url ans = Robot.is_valid_url(self, url) and self.url_pattern.match( url) != None and not url.startswith( 'http://www.ganji.com/fang1') and not url.startswith( 'http://help.ganji.com') and not url.startswith( 'http://club.ganji.com') #print ans return ans
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile( ur'^http://[\w\d\.]+\.soufun\.com(|/|/house/?|/chuzu/[\d_]+\.htm)$', re.U | re.I)
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile(ur'^http://\w+\.ganji\.com(|/|/fang1/?|/fang1/\d+x\.htm|/fang1/tuiguang[-]\d+.htm|/fang1/f\d+/?)$', re.U | re.I) self.arch_pattern = re.compile(ur'[房户](\s| )*型[^::]*[::]([^\d]*)(\d[^<>\s]+)[\s<]', re.U | re.I)
def is_valid_url(self, url): #print url ans = Robot.is_valid_url(self, url) and self.url_pattern.match(url) != None and not url.startswith('http://www.ganji.com/fang1') and not url.startswith('http://help.ganji.com') and not url.startswith('http://club.ganji.com') #print ans return ans
def get_address(self, page): ans = Robot.get_address(self, page) if ans == None: ans = Robot.get_district(self, page) return ans
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile( ur'^http://\w+.anjuke.com/(rental/(p\d+)?|prop/rent/\d+)/?$')
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.arch_pattern = re.compile(ur'[房户](\s| )*型[^::]*[::]\s*(<b>)(\d[^<\s]+)[<\s]', re.U | re.I) self.url_pattern = re.compile(ur'^http://\w+\.esf\.focus\.cn(|/|/zufang/?|/zufang/\d+\.html|/zufang/zj0m0h0jz0zx0zlfs0bq0fbr0p\d+px0t0jd0bx0pic0q0b0xl0zd0xq0st1s0tx1/?)$', re.U | re.I)
def is_valid_url(self, url): return Robot.is_valid_url(self, url) and self.url_pattern.match(url) != None
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile(ur'^http://\w+.anjuke.com/(rental/(p\d+)?|prop/rent/\d+)/?$')
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile(ur'^(http://\w+\.zufang\.sina\.com\.cn(|/|/detail/\d+/?|/house/(n\d+)?/?)|http://\w+\.esf\.sina\.com\.cn/?)$', re.U | re.I)
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile( ur'http://\w+\.haozu\.com/(listing/(p%d+)?|rental/broker/n/\d+)?/?$' )
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile(ur'http://\w+\.58\.com/zufang/(pn\d+/?|\d+x.shtml)?$', re.U | re.I)
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile(ur'http://\w+\.haozu\.com/(listing/(p%d+)?|rental/broker/n/\d+)?/?$')
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile(ur'^http://[\w\d\.]+\.soufun\.com(|/|/house/?|/chuzu/[\d_]+\.htm)$', re.U | re.I)
def __init__(self, root, charset): Robot.__init__(self, root, charset) self.url_pattern = re.compile( ur'^(http://\w+\.zufang\.sina\.com\.cn(|/|/detail/\d+/?|/house/(n\d+)?/?)|http://\w+\.esf\.sina\.com\.cn/?)$', re.U | re.I)