def get_detail_url(): conn = get_psql_con() cursor = conn.cursor() sql_string = ''' SELECT "propertyId" FROM realtor_detail_page_json WHERE "isDirty" = '1' OR "detailJson" IS NULL ''' url_lists = [] cursor.execute(sql_string) for result in cursor.fetchall(): # print(result) url = 'https://mapi-ng.rdc.moveaws.com/api/v1/properties/{}?client_id=rdc_mobile_native%2C9.3.7%2Candroid'.format( result[0]) url_lists.append(url) print(len(url_lists)) url_lists_string = ','.join(url_lists) cursor.close() conn.commit() conn.close() return url_lists_string
def get_detail_url(): from crawl_tools.get_psql_con import get_psql_con import redis pool = redis.ConnectionPool(host='127.0.0.1', # password='******' ) redis_pool = redis.Redis(connection_pool=pool) conn = get_psql_con() cursor = conn.cursor() sql_string = ''' SELECT "propertyId" FROM realtor_detail_page_json WHERE "isDirty" = '1' OR "detailJson" IS NULL ''' cursor.execute(sql_string) for result in cursor.fetchall(): redis_pool.lpush('realtor:property_id', 'http://{}'.format(result[0])) # redis_pool.lpush('realtor:property_id', result[0]) cursor.close() conn.commit() conn.close()
import datetime from scrapy.cmdline import execute from crawl_tools.get_psql_con import get_psql_con truncate_realtor_list_str = ''' TRUNCATE realtor_list_page_json; ''' truncate_realtor_list_splite_str = ''' TRUNCATE realtor_list_page_json_splite ''' conn = get_psql_con() cursor = conn.cursor() cursor.execute(truncate_realtor_list_str) conn.commit() cursor.execute(truncate_realtor_list_splite_str) conn.commit() conn.close() print('清空realtor_list_page_json 表和清空清空realtor_list_page_json_splite 表成功') scrapy_start_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') execute([ 'scrapy', 'crawl', # 'realtor', # 'realtor_app', # 'realtor_property_web', 'realtor_app_list_page', # "-a", # "start_urls={}".format(start_urls),
def __init__(self): self.conn = get_psql_con()