Esempio n. 1
0
def get_detail_url():
    conn = get_psql_con()
    cursor = conn.cursor()
    sql_string = '''
        SELECT
    	"propertyId" 
    FROM
    	realtor_detail_page_json 
    WHERE
    	"isDirty" = '1' 
    	OR "detailJson" IS NULL
    '''
    url_lists = []
    cursor.execute(sql_string)
    for result in cursor.fetchall():
        # print(result)
        url = 'https://mapi-ng.rdc.moveaws.com/api/v1/properties/{}?client_id=rdc_mobile_native%2C9.3.7%2Candroid'.format(
            result[0])
        url_lists.append(url)
    print(len(url_lists))
    url_lists_string = ','.join(url_lists)
    cursor.close()
    conn.commit()
    conn.close()
    return url_lists_string
Esempio n. 2
0
def get_detail_url():
    from crawl_tools.get_psql_con import get_psql_con
    import redis
    pool = redis.ConnectionPool(host='127.0.0.1',
                                # password='******'
                                )
    redis_pool = redis.Redis(connection_pool=pool)
    conn = get_psql_con()
    cursor = conn.cursor()
    sql_string = '''
        SELECT
    	"propertyId" 
    FROM
    	realtor_detail_page_json 
    WHERE
    	"isDirty" = '1' 
    	OR "detailJson" IS NULL
    '''
    cursor.execute(sql_string)
    for result in cursor.fetchall():
        redis_pool.lpush('realtor:property_id', 'http://{}'.format(result[0]))
        # redis_pool.lpush('realtor:property_id', result[0])
    cursor.close()
    conn.commit()
    conn.close()
Esempio n. 3
0
import datetime
from scrapy.cmdline import execute
from crawl_tools.get_psql_con import get_psql_con

truncate_realtor_list_str = '''
    TRUNCATE realtor_list_page_json;
'''

truncate_realtor_list_splite_str = '''
    TRUNCATE realtor_list_page_json_splite
'''
conn = get_psql_con()
cursor = conn.cursor()
cursor.execute(truncate_realtor_list_str)
conn.commit()
cursor.execute(truncate_realtor_list_splite_str)
conn.commit()
conn.close()
print('清空realtor_list_page_json 表和清空清空realtor_list_page_json_splite 表成功')

scrapy_start_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

execute([
    'scrapy',
    'crawl',
    # 'realtor',
    # 'realtor_app',
    # 'realtor_property_web',
    'realtor_app_list_page',
    # "-a",
    # "start_urls={}".format(start_urls),
Esempio n. 4
0
 def __init__(self):
     self.conn = get_psql_con()