Ejemplo n.º 1
0
 def __init__(self):
     self.conn = get_sql_con()
     self.cursor = self.conn.cursor()
     self.sql = '''
         insert into realtor_list_page_json(jsonData,optionDate) values(%s,now())
     '''
     self.sql2 = '''
Ejemplo n.º 2
0
 def execute_spider_close(self):
     conn = get_sql_con()
     # 将realtor_list_json表中的数据拆分开,并删除空的情况
     self.splite_list_data(conn)
     # 找到有的propertyId 并且lastUpate和address字段改变了的,这里应该使用批量更新
     self.update_detail_data(conn, 100)
     # 找到detail_page_json 表中没有的propertyId,并将它插入到该表中;
     self.insert_detail_data(conn)
     # 删除在split中没有,但是detail有的数据
     self.delete_not_exit(conn, 100)
     # 将搜索条件插入到redis中
     self.get_detail_url(conn)
     conn.close()
Ejemplo n.º 3
0
    def truncate_list_json_and_split_table():
        """
        清空realtor_list_page_json 表和realtor_list_page_json_split 表
        :return:
        """
        from crawl_tools.get_sql_con import get_sql_con

        truncate_realtor_list_str = '''
            TRUNCATE tb_realtor_list_page_json;
        '''

        truncate_realtor_list_splite_str = '''
            TRUNCATE tb_realtor_list_page_json_splite
        '''
        conn = get_sql_con()
        cursor = conn.cursor()
        cursor.execute(truncate_realtor_list_str)
        conn.commit()
        cursor.execute(truncate_realtor_list_splite_str)
        conn.commit()
        conn.close()
        print('清空realtor_list_page_json 表和清空清空realtor_list_page_json_splite 表成功')
Ejemplo n.º 4
0
def get_detail_url():
    from crawl_tools.get_sql_con import get_sql_con
    import redis
    pool = redis.ConnectionPool(host='127.0.0.1',
                                # password='******'
                                )
    redis_pool = redis.Redis(connection_pool=pool)
    conn = get_sql_con()
    cursor = conn.cursor()
    sql_string = '''
        SELECT
    	property_id
    FROM
    	tb_realtor_detail_json 
    	limit 10,30
    '''
    cursor.execute(sql_string)
    for result in cursor.fetchall():
        redis_pool.lpush('realtor:property_id', 'http://{}'.format(result[0]))
        # redis_pool.lpush('realtor:property_id', result[0])
    cursor.close()
    conn.commit()
    conn.close()
Ejemplo n.º 5
0
def get_detail_url():
    conn = get_sql_con()
    cursor = conn.cursor()
    sql_string = '''
        SELECT
    	propertyId 
    FROM
    	realtor_detail_json 
    WHERE
    	isDirty = '1' 
    	OR detailJson IS NULL
    '''
    url_lists=[]
    cursor.execute(sql_string)
    for result in cursor.fetchall():
        # print(result)
        url = 'https://mapi-ng.rdc.moveaws.com/api/v1/properties/{}?client_id=rdc_mobile_native%2C9.3.7%2Candroid'.format(result[0])
        url_lists.append(url)
    print(len(url_lists))
    url_lists_string = ','.join(url_lists)
    cursor.close()
    conn.commit()
    conn.close()
    return url_lists_string
Ejemplo n.º 6
0
 def __init__(self):
     self.conn = get_sql_con()
     self.cursor = self.conn.cursor()
     self.sql = '''
Ejemplo n.º 7
0
 def __init__(self):
     self.conn = get_sql_con()
Ejemplo n.º 8
0
import datetime
from scrapy.cmdline import execute
from crawl_tools.get_sql_con import get_sql_con


truncate_realtor_list_str = '''
    TRUNCATE realtor_list_page_json;
'''

truncate_realtor_list_splite_str = '''
    TRUNCATE realtor_list_page_json_splite
'''
conn = get_sql_con()
cursor = conn.cursor()
cursor.execute(truncate_realtor_list_str)
conn.commit()
cursor.execute(truncate_realtor_list_splite_str)
conn.commit()
conn.close()
print('清空realtor_list_page_json 表和清空清空realtor_list_page_json_splite 表成功')


scrapy_start_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

execute(['scrapy', 'crawl',
         # 'realtor',
         # 'realtor_app',
         # 'realtor_property_web',
         'realtor_app_list_page',
         # "-a",
         # "start_urls={}".format(start_urls),