Ejemplo n.º 1
0
def fetch_estate():

    database = torndb.Connection(**dbutil.get_mysql_config())
    urls = []
    for result in database.query('select distinct communityId from house'):
        estate_id = result.communityId
        urls.append('http://www.iwjw.com/estate/%s/' % estate_id)
    master = Master(rest_period=5, result_model='html', result_dir='../iwjw/estate')
    fetcher = Fetcher(processor=ps.Processor_hn())
    master.add_fetchers(fetcher)
    master.start(urls)
Ejemplo n.º 2
0
def fetch_house_from_db():

    print 'sales'
    existed = set([f.replace('.html', '') for f in os.listdir('../iwjw/sale')])
    master = Master(rest_period=5, result_model='html', result_dir='../iwjw/sale')
    fetcher = Fetcher(processor=ps.Processor_hn())
    master.add_fetchers(fetcher)
    database = torndb.Connection(**dbutil.get_mysql_config())
    sale_list = database.query('select houseId from house where type=1;')
    sale_list = [result.houseId for result in sale_list if not result.houseId in existed]
    sale_list = ['http://www.iwjw.com/sale/%s/' % hid for hid in sale_list]
    master.start(sale_list)
    database.close()
Ejemplo n.º 3
0
def fetch_house():

    # print 'sales'
    # master = Master(rest_period=5, result_model='html', result_dir='../iwjw/sale')
    # fetcher = Fetcher(processor=ps.Processor_hn())
    # master.add_fetchers(fetcher)
    # sales = list(get_houses('../iwjw/sale_list', 'sale'))
    # master.start(sales)

    print 'rent'
    master = Master(rest_period=5, result_model='html', result_dir='../iwjw/rent')
    fetcher = Fetcher(processor=ps.Processor_hn())
    master.add_fetchers(fetcher)
    rents = list(get_houses('../iwjw/rent_list', 'chuzu'))
    master.start(rents)
Ejemplo n.º 4
0
def fetch_list():

    print 'sale_list'
    master = Master(rest_period=5, result_model='html', result_dir='../iwjw/sale_list')
    fetcher = Fetcher(processor=ps.ProcessorIwjw())
    master.add_fetchers(fetcher)
    urls = [line.split('#')[0].strip() for line in codecs.open('../district.id')]
    urls = map(lambda x: 'http://www.iwjw.com/sale/shanghai/%sp1/' % x, urls)
    master.start(urls)

    print 'rent_list'
    master = Master(rest_period=5, result_model='html', result_dir='../iwjw/rent_list')
    fetcher = Fetcher(processor=ps.ProcessorIwjw())
    master.add_fetchers(fetcher)
    urls = [line.split('#')[0].strip() for line in codecs.open('../district.id')]
    urls = map(lambda x: 'http://www.iwjw.com/chuzu/shanghai/%sp1/' % x, urls)
    master.start(urls)