예제 #1
0
def use_which_code():
    urls = urls_()
    for url in urls:
        is_zhuanzhuan = 'http://zhuanzhuan' in url.split('.')[0]
        is_oldxiangqingye = 'http://sz' in url.split('.')[0]
        if is_zhuanzhuan:
            get_zhuan_info(url)
        elif is_oldxiangqingye:
            get_item_info(url)
        else:
            pass
예제 #2
0
def get_all_links(channel):
    for num in range(1,151):
        ip = random.choice(proxy_lists)
        if channel!="http://bj.ganji.com/shoujihaoma/":
            get_item_link(header,ip,channel,num)
        else:
            get_phone_links(header,ip,num)
    print("所有商品链接已保存成功!")
    for url in url_list.find("item_link"):
        ip = random.choice(proxy_lists)
        get_item_info(header,ip,url)
    for url in phNum_list.find("phone_link"):
        ip = random.choice(proxy_lists)
        get_phone_info(header,ip,url)
예제 #3
0
def urls_huifu():
    db_urls = [item['url'] for item in url_list.find()]
    index_urls = [item['url'] for item in item_info.find()]
    x = set(db_urls)
    y = set(index_urls)
    rest_of_urls = x - y

    for url in rest_of_urls:
        is_zhuanzhuan = 'http://zhuanzhuan' in url.split('.')[0]
        is_oldxiangqingye = 'http://sz' in url.split('.')[0]
        if is_zhuanzhuan:
            get_zhuan_info(url)
        elif is_oldxiangqingye:
            print(url)
            get_item_info(url)
        else:
            pass
예제 #4
0
from multiprocessing import Pool
from channel_extarct import channel_list
from page_parsing import url_list
from page_parsing import get_url_link
from page_parsing import get_item_info
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')     # 改变标准输出的默认编码

def get_all_links(channel):
    for num in range(1, 10):
        try:
            get_url_link(channel, num, who_sells=0)
            get_url_link(channel, num, who_sells=1)
        except:
            pass



if __name__ == '__main__':
    pool = Pool()
    # pool.map(get_all_links, channel_list.split())
    try:
        for item_url in url_list.find():
            print(item_url['url'])
            get_item_info(item_url['url'])
    except:
        pass

예제 #5
0
from multiprocessing import Pool
from channel_extract import channel_list
from page_parsing import get_links_from, get_item_info
import time
import pymongo

client = pymongo.MongoClient('localhost', 27017)
ceshi = client['ceshi']
url_list = ceshi['url_list']


def get_all_links_from(channel):
    for num in range(1, 101):
        get_links_from(channel, num)


if __name__ == '__main__':
    pool = Pool()
    #pool.map(get_all_links_from,channel_list.split())
    for item in url_list.find():
        if item['url'] != "http://jump.zhineng.58.com/jump":
            get_item_info(item['url'])
        else:
            pass
예제 #6
0
def get_all_item(item_url):
    url = item_url['url']
    crb = item_url['crb']
    get_item_info(url, crb)
예제 #7
0
def get_all_item_info(url):
    page_parsing.get_item_info(url)
예제 #8
0
from multiprocessing import Pool
from channel_extract import channel_list
from page_parsing import get_links_from
from page_parsing import get_item_info


def get_alt_links_from(channel):
    for run in range(1, 5):
        get_links_from(channel, run)


if __name__ == '__main__':
    pool = Pool()
    #pool.map(get_all_links_from,channel_list.split())
    for i in get_item_info(pool.map(get_alt_links_from, channel_list.split())):
        print(i)
예제 #9
0
from multiprocessing import Pool
from channel_extarct import channel_list
from page_parsing import url_list
from page_parsing import get_url_link
from page_parsing import get_item_info
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,
                              encoding='utf8')  # 改变标准输出的默认编码


def get_all_links(channel):
    for num in range(1, 10):
        try:
            get_url_link(channel, num, who_sells=0)
            get_url_link(channel, num, who_sells=1)
        except:
            pass


if __name__ == '__main__':
    pool = Pool()
    # pool.map(get_all_links, channel_list.split())
    try:
        for item_url in url_list.find():
            print(item_url['url'])
            get_item_info(item_url['url'])
    except:
        pass
예제 #10
0
def get_all_info_from(url):
    key = url.split('/')
    if key[3] == 'shoujihaoma':
        page_parsing.get_pnumber_info(url)
    else:
        page_parsing.get_item_info(url)
예제 #11
0
def get_all_item_info(url):
    page_parsing.get_item_info(url)
예제 #12
0
def get_all_info_from(url):
    key = url.split('/')
    if key[3] == 'shoujihaoma':
        page_parsing.get_pnumber_info(url)
    else:
        page_parsing.get_item_info(url)
예제 #13
0
from  page_parsing import url_list,get_item_info

for item in url_list.find():
    get_item_info(item['url'])
예제 #14
0
def getAllUrlLists(channel):
    for num in range(1, 101):
        for url in getUrlList(channel, num):
            get_item_info(url)
예제 #15
0
x = set(item_all)
y = set(item_any)
item_result = x - y


def get_links_from_urllist(channel):

    for num in range(1, 101):
        try:
            while True:
                if get_links_from(channel, num) == None:
                    break
                else:
                    get_links_from(channel, num)
        except KeyboardInterrupt:
            break


if __name__ == "__main__":
    # get_item_infos(item_result)
    for i in item_result:
        try:
            get_item_info(i)
        except:
            print(i)
    print('结束!')
    # pool = Pool(processes=4)
    # pool.map(get_links_from_urllist,url_list.split())

#     for link in url_list.split():
#         get_links_from_urllist(link)
예제 #16
0
def get_all_items_info():
    for a in all_links:
        for b in a:
            page_parsing.get_item_info(b)