Beispiel #1
0
def get_image_content(url):
    header = random.choice(headers)
    response_data = None
    try:
        response = requests.get(url, header)
        if response.status_code == 200:
            response_data = response.content
        else:
            log("获取:{}失败".format(url))
    except RuntimeError:
        log("请求{}异常".format(url))

    return response_data
Beispiel #2
0
def queryRecordByUidAndDynamicId(uid, dynamic_id):
    db = pymysql.connect("localhost", "root", "root2037", "blibli")
    cursor = db.cursor()
    sql = "SELECT * FROM t_dynamic WHERE uid={} AND dynamic_id={}".format(
        uid, dynamic_id)
    result = None
    try:
        cursor.execute(sql)
        result = cursor.fetchone()
        cursor.close()
    except EnvironmentError:
        log("执行sql:{}异常".format(sql))

    db.close()
    return result
Beispiel #3
0
def updateDynamicToDownloaded(uid, dynamic_id):
    db = pymysql.connect("localhost", "root", "root2037", "blibli")
    cursor = db.cursor()
    sql = "UPDATE t_dynamic SET download_status = 1 WHERE uid = {} AND dynamic_id = {}".format(
        uid, dynamic_id)
    affect_count = 0
    try:
        affect_count = cursor.execute(sql)
        db.commit()
        cursor.close()
    except EnvironmentError:
        log("执行:sql:{} 失败".format(sql))
        db.rollback()
    db.close()

    return affect_count
Beispiel #4
0
def getUID():
    db = pymysql.connect("localhost", "root", "root2037", "blibli")
    cursor = db.cursor()
    sql = "SELECT * FROM t_user"
    uids = []
    try:
        cursor.execute(sql)
        result = cursor.fetchall()
        cursor.close()
        for item in result:
            uid = item[0]
            uids.append(uid)
    except EnvironmentError:
        log("执行sql:{}异常".format(sql))
    db.close()

    return uids
Beispiel #5
0
def insertDynamicList(dynamic_list):
    # 生成动态的数据
    insert_dynamic_list = []
    # 生成图片的数据
    insert_image_list = []
    for dynamic in dynamic_list:
        uid = dynamic[0]
        dynamic_id = dynamic[1]
        image_list = dynamic[2]
        # 生成动态表的数据
        insert_dynamic_list.append((dynamic_id, uid, '0'))
        # 生成图片表的数据
        for image in image_list:
            image_id = str(image).split("/")[-1]
            # 去掉相应的@数据
            image_id = image_id.split("@")[0]
            # 统一换成https
            url = str(image).replace("http:", "")
            url = url.replace("https:", "")
            url = "https:{}".format(url)
            src = url.split("@")[0]
            # 图片数据
            insert_image_data = (image_id, src, '0', uid, dynamic_id)
            insert_image_list.append(insert_image_data)
    # 获得全部数据后分两次批量插入
    insert_dynamic_sql = "INSERT INTO t_dynamic (dynamic_id,uid,download_status) VALUES (%s,%s,%s)"
    insert_image_sql = "INSERT INTO t_image (id,src,status,uid,dynamic_id) VALUES (%s,%s,%s,%s,%s)"
    db = pymysql.connect("localhost", "root", "root2037", "blibli")
    cursor = db.cursor()
    try:
        # 批量插入动态
        if len(dynamic_list) > 0:
            cursor.executemany(insert_dynamic_sql, insert_dynamic_list)
            # 提交变更
            db.commit()
        # 批量插入图片
        if len(insert_image_list) > 0:
            cursor.executemany(insert_image_sql, insert_image_list)
            # 提交变更
            db.commit()
    except EnvironmentError:
        db.rollback()
        log("插入错误!!!!")
    db.close()
Beispiel #6
0
def queryImagesByDidAndUid(uid, dynamic_id):

    db = pymysql.connect("localhost", "root", "root2037", "blibli")
    cursor = db.cursor()
    sql = "SELECT DISTINCT src FROM t_image WHERE uid ={} AND dynamic_id = {}".format(
        uid, dynamic_id)
    list = []
    try:
        cursor.execute(sql)
        results = cursor.fetchall()
        for item in results:
            src = item[0]
            list.append(src)

        cursor.close()
    except EnvironmentError:
        log("执行sql:{}失败".format(sql))

    db.close()
    return list
Beispiel #7
0
def insertListToDataBase(list, uid, dynamic_id):
    db = pymysql.connect("localhost", "root", "root2037", "blibli")
    affect_count = -1
    cursor = db.cursor()
    sql = "INSERT INTO t_image (id,src,status,uid,dynamic_id) VALUES (%s,%s,%s,%s,%s)"
    insertList = []
    for item in list:
        id = str(item).split("/")[-1]
        status = 0
        insertData = (id, item, '0', uid, dynamic_id)
        insertList.append(insertData)

    try:
        affect_count = cursor.executemany(sql, insertList)
        db.commit()
        cursor.close()
    except EnvironmentError:
        log("执行sql:{}失败".format(sql))
        db.rollback()
    db.close()
    return affect_count
Beispiel #8
0
def insertDynamic(uid, dynamic_id, image_list):
    db = pymysql.connect("localhost", "root", "root2037", "blibli")
    cursor = db.cursor()
    # 写入数据库分成两次写入
    # 1、写入动态数据
    insert_dynamic_sql = "INSERT INTO t_dynamic (dynamic_id,uid,download_status) VALUES ({},{},{})".format(
        dynamic_id, uid, '0')
    insert_image_sql = "INSERT INTO t_image (id,src,status,uid,dynamic_id) VALUES (%s,%s,%s,%s,%s)"
    try:
        log("执行sql:{}".format(insert_dynamic_sql))
        cursor.execute(insert_dynamic_sql)
        insertList = []
        for item in image_list:
            id = str(item).split("/")[-1]
            insertData = (id, item, '0', uid, dynamic_id)
            insertList.append(insertData)
        log("插入图片数据")
        cursor.executemany(insert_image_sql, insertList)
        # 提交数据
        db.commit()
        cursor.close()
    except EnvironmentError:
        log("执行sql:{}失败".format(insert_dynamic_sql))
        db.rollback()
    db.close()
    # 2、写入图片
    insertListToDataBase(list=image_list, uid=uid, dynamic_id=dynamic_id)
Beispiel #9
0
def getImageUrlListByCV(id):
    url = "https://www.bilibili.com/read/cv{}".format(id)
    header = random.choice(headers)
    log("请求CV:{}".format(url))
    response = requests.get(url, header)
    list = []
    if response.status_code == 200:
        response_data = BeautifulSoup(response.content.decode('utf-8'), 'html.parser')
        image_box = response_data.select('.img-box > img')
        if image_box:
            for item in image_box:
                src = "https:" + item.attrs['data-src']
                list.append(src)
            #
            log("获得CV:{}图片成功".format(url))
        else:
            log("获得CV:{}图片失败".format(url))
    else:
        log("请求CV:{}失败".format(url))
    # 防止过快请求
    time.sleep(1)
    return list
Beispiel #10
0
def getImageUrlList(url):
    # 返回值
    list = []
    # 最后的id
    last_uid = 0
    # 是否还有新的
    has_more = 0
    # 根据URL请求数据
    header = random.choice(headers)
    log("请求:{}\n".format(url))
    response = requests.get(url, header)
    if response.status_code == 200:
        response_data = json.loads(response.text)
        data = response_data['data']
        if data:
            # has_more
            has_more = data['has_more']
            # list
            if has_more != 0:
                cards = data['cards']
                if cards:
                    cards_num = len(cards)
                    for i in range(0, cards_num):
                        card = cards[i]
                        card_type = card['desc']['type']
                        # 专栏图片
                        if card_type == 64:
                            cv_id = card['desc']['rid']
                            print("是CV Id = " + str(cv_id))
                            cv_image_list = getImageUrlListByCV(id=cv_id)
                            dy_id = card['desc']['dynamic_id']
                            # 添加进去
                            list.append((dy_id, cv_image_list))
                            # for img in cv_image_list:
                            #     list.append(img)
                        elif card_type == 2:
                            # 非专栏card,获得图片数据
                            card_str = card['card']
                            # 解析为json
                            card_json = json.loads(card_str)
                            item = card_json['item']
                            if item:
                                pictures = item['pictures']
                                # 获得pictures中的图片url加入列表中
                                if pictures:
                                    dy_id = card['desc']['dynamic_id']
                                    img_list = []
                                    for picture in pictures:
                                        image_src = picture['img_src']
                                        img_list.append(image_src)
                                        # list.append(image_src)
                                    list.append((dy_id, img_list))
                        # 获得最后一个card 的id
                        if i == cards_num - 1:
                            last_uid = card['desc']['dynamic_id']
                            print("最后一个动态ID = "+str(last_uid))
        else:
            log("请求:--{}--无数据\n".format(url))
    else:
        # 失败写入日志
        log_message = "请求--{}--失败\n".format(url)
        log(log_message)
    return list, has_more, last_uid
Beispiel #11
0
def save_image(image_name,image_conten):
    filename = save_path + image_name
    with open(filename, 'wb') as fp:
        fp.write(image_conten)
        log("保存:{}成功".format(image_name))
        fp.close()