コード例 #1
0
 def open_spider(self, spider):
     '''开启爬虫执行一次'''
     db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER,
                            MYSQL_PASSWD, MYSQL_DATABASE)
     check_file.check_log('szse_notice.log')
     check_file.check_log('szse_cp_notice.log')
     spider.db = db
     spider.cs = cs
コード例 #2
0
    def open_spider(self, spider):
        db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER,
                               MYSQL_PASSWD, MYSQL_DATABASE)
        spider.db = db
        spider.cs = cs
        pro_db, pro_cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER,
                                       MYSQL_PASSWD, PROXY_DB)

        spider.pro_db = pro_db
        spider.pro_cs = pro_cs
        ip_tuple = get_proxy(spider.pro_cs)
        spider.proxy_list = ip_tuple
        shares = get_shares(spider.cs)
        shares = [i for i in shares if i] if shares else None
        # finished_code = finished_shares(spider.cs)
        # shares = set(shares) - set(finished_code)
        spider.shares_list = shares if shares else None
コード例 #3
0
    def open_spider(self, spider):
        '''开启爬虫执行一次'''
        if spider.name == 'shares_concept':
            pro_db, pro_cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER,
                                           MYSQL_PASSWD, PROXY_DB)

            spider.pro_db = pro_db
            spider.pro_cs = pro_cs
            ip_tuple = get_proxy(spider.pro_cs)
            spider.proxy_list = ip_tuple
            # print(spider.ip_tuple)
        db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER,
                               MYSQL_PASSWD, MYSQL_DATABASE)
        check_file.check_log('shares_concept.log')

        spider.db = db
        spider.cs = cs
        shares = get_shares(spider.cs)
        # finished_code = finished_shares(spider.cs)
        # shares = set(shares) - set(finished_code)
        spider.shares_list = shares if shares else None
コード例 #4
0
    def open_spider(self, spider):
        '''开启爬虫执行一次'''
        if spider.name in ['sse_cp_notice', 'sse_shares']:
            pro_db, pro_cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, PROXY_DB)

            spider.pro_db = pro_db
            spider.pro_cs = pro_cs
            ip_tuple = get_proxy(spider.pro_cs)
            spider.ip_tuple = ip_tuple
            # print(spider.ip_tuple)
        db, cs = connect_mysql(MYSQL_HOST, MYSQL_PORT, MYSQL_USER, MYSQL_PASSWD, MYSQL_DATABASE)
        # check_file.check_log('sse_notice.log')
        # check_file.check_log('sse_cp_notice.log')
        client = MongoClient(host='111.193.233.196', port=27017)
        coll = client.shangjiaosuo.sse
        data_cs = coll.find()
        coll_backup = client.shangjiaosuo.sse_backup
        spider.data_cs = data_cs
        spider.coll = coll
        spider.coll_backup = coll_backup
        spider.db = db
        spider.cs = cs
コード例 #5
0
ファイル: views.py プロジェクト: zzkzzk1996/CS527
def alexa_query():
    query = request.args.get('query', 'show tables;')
    connection = connect_mysql(
        host='cs527mysql2.chmrmo5grph7.us-east-1.rds.amazonaws.com',
        user='******',
        password='******',
        db='test')
    col_name, content, query_time = connection.run_query(query)
    # result = {'col_name': col_name, 'result': content, 'query_time': query_time}
    temp_json['col_name'] = col_name
    temp_json['result'] = content
    temp_json['query_time'] = query_time
    connection.disconnect()
    return "Success"
コード例 #6
0
    PRODUCT_ORIGIN_weight = config["weight"]["PRODUCT_ORIGIN_weight"]
    BRAND_weight = config["weight"]["BRAND_weight"]
    # 创建权重字典
    weight_dict = {
        "PTY_NUM_1": PTY_NUM_1_weight,
        "PTY_NUM_2": PTY_NUM_2_weight,
        "PTY_NUM_3": PTY_NUM_3_weight,
        "PRODUCT_ORIGIN_NUM_ID": PRODUCT_ORIGIN_weight,
        "BRAND_ID": BRAND_weight
    }

    query = """select distinct ITEM_NUM_ID, PTY_NUM_1, PTY_NUM_2, PTY_NUM_3, PRODUCT_ORIGIN_NUM_ID, BRAND_ID 
               from goods_data where ITEM_NUM_ID is not null"""

    # 读取商品数据
    conn = connect_mysql(mysql_host, mysql_port, mysql_db, mysql_user,
                         mysql_password)
    cursor = conn.cursor()
    cursor.execute(query)
    columns = [
        'ITEM_NUM_ID', 'PTY_NUM_1', 'PTY_NUM_2', 'PTY_NUM_3',
        'PRODUCT_ORIGIN_NUM_ID', 'BRAND_ID'
    ]

    # 获得spu和spu的特征
    item_sentence_dict = {}
    for line in cursor.fetchall():
        item = line[0]
        sentence = [
            column + "|" + str(value)
            for column, value in zip(columns[1:], line[1:]) if value != None
        ]