Beispiel #1
0
def main():
    range_ = input('请输入下载范围(xxx-xxx):')
    os.chdir(os.getcwd())
    try:
        os.mkdir('画师通')
    except:
        pass
    os.chdir('画师通')
    pool = pl()
    for each in geturl(range_):
        each = list(map(lambda x: x.split('src="')[-1], each))
        pool.map(download, each)
    pool.close()
    pool.join()
    print('下载完成')
Beispiel #2
0
            s3 = etree.HTML(r1.text)
            pic1 = s3.xpath('//*[@class="content-pic"]/a/img/@src')
            file_name = pic1[0].split('/')[-1]
            r2 = requests.get(pic1[0], headers=headers)
            with open(file_name, 'wb') as f:
                f.write(r2.content)
                print('downloading......')
    except:
        pass


def main():
    url_list = []
    for i in range(2, 10):
        url = 'https://www.mm131.net/xinggan/list_6_2.html'.replace(
            '2', str(i))
        url_list.append(url)
    return url_list


if __name__ == '__main__':
    url_list = main()
    pool = pl()
    pool.map(geturl, url_list)
    pool.map(download, url_all)
    pool.close()
    pool.join()
    print('下载完成')

#download('https://www.mm131.net/xinggan/2260.html')
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    selector = etree.HTML(response.text)
    for j in range(1, 31):
        # 获取每张图片的url
        count += 1
        url_iamge = selector.xpath(
            '//*[@id="houselist-mod-new"]/li[{}]/div[1]/img/@src'.format(j))[0]
        res = requests.get(url_iamge, headers=headers)
        #  这里要注意图片的名字不能重复

        with open("./安居客/{}.jpg".format(str(count)), 'wb') as f:
            f.write(res.content)  # 把图片内容写入
        time.sleep(2)

    # 爬完休息一会
    time.sleep(2)


if __name__ == "__main__":
    pool = pl(4)  # 初始化线程池
    preurl = 'https://xa.anjuke.com/sale/p{}/?pi=baidu-cpc-xa-tyongxa1&kwid=89460384111#filtersort'
    house_url = [preurl.format(i) for i in range(1, 5)]  # 用列表推导式搞出10页的url

    # 将url映射给spider
    pool.map(spider, house_url)
    pool.close()
    pool.join()
Beispiel #4
0
    result_num.append([Key,num_o,num_h])

def artical_write(result_num):
    with open('KeyWord2.csv', 'wt',newline='') as csvfile:
        writer = csv.writer(csvfile)
        # 先写入columns_name
        writer.writerow(['KeyWrods','num_o','num_h'])
        # 再写入数据
        for res in result_num:
            writer.writerow([res[0],res[1],res[2]])



if __name__ == '__main__':
    # 线程数, 默认为cpu核心数
    pool = pl(4)

    global result_num
    result_num = []

    #读取文档中的关键字
    with open('KeyWord.csv', 'r') as KW:
        KeyWrods = KW.readlines()

    # url列表收集
    all_url = []
    url = 'https://www.amazon.com/s?k='
    strings = '&ref=nb_sb_noss_1'
    for i in KeyWrods:
        i_string = '"' +'+'.join(i.split(" ")).strip() + '"'
        all_url.append([url + '+'.join(i.split(" ")).strip() +strings,url + i_string + strings])