예제 #1
0
def ping_waimai_meituan_restaurant_by_id(id):
    print("******" + str(id) + "*****")
    url_web = waimei_meituan_com_restaurant_url_web + str(id)
    url_wap = waimei_meituan_com_restaurant_url_wap + str(id)
    html = get_response_by_url(url_web)
    # print(html)
    html = html_content_without_special_chars(html)
    soup = BeautifulSoup(html)
    # print(soup)
    noexits_soup = soup.select(".rest-info")
    is_restaurant_exist = len(noexits_soup) > 0
    # print(is_restaurant_exist)
    if (is_restaurant_exist):
        model = effective_restaurant()
        model.id = id
        model.url_web = url_web
        model.url_wap = url_wap
        model.waimei_src = 'meituan'
        model.waimai_src_cn = '美团外卖'

        class_model = model.__dict__
        print(class_model)
        print("-----------------------------id为:" + str(id) +
              "-----------------------------------")
        Insert(class_model, "effective_restaurants")
예제 #2
0
def ping_waimai_taobao_shop_by_id(id):
    print("******" + str(id) + "*****")
    url_web = taobao_waimai_url_web
    url_wap = taobao_waimai_url_wap + str(id)
    html = get_response_by_url(taobao_waimai_url_wap_api)
    print(html)
    html = html_content_without_special_chars(html)
    soup = BeautifulSoup(html)
    # print(soup)
    noexits_soup = soup.select("body .page .shop-info")
    print(noexits_soup)

    return
    is_restaurant_exist = len(noexits_soup) > 0
    # print(is_restaurant_exist)
    if (is_restaurant_exist):
        model = effective_restaurant()
        model.id = id
        model.url_web = url_web
        model.url_wap = url_wap
        model.waimei_src = 'taobao'
        model.waimai_src_cn = '淘宝外卖'

        class_model = model.__dict__
        print(class_model)
        print("-----------------------------id为:" + str(id) +
              "-----------------------------------")
        Insert(class_model, "effective_restaurants")
    return
예제 #3
0
def ping_waimai_eleme_shop_by_id(id):
    print("******" + str(id) + "*****")
    url_web = eleme_shop_url_web + str(id)
    url_wap = eleme_shop_url_wap + str(id)
    url_api = eleme_shop_url_wap_api + str(id)
    html = get_response_by_url(url_api)
    html = html_content_without_special_chars(html)
    print(html)
    is_restaurant_not_exist = str(html).find("message") > -1
    print(is_restaurant_not_exist)
    if (is_restaurant_not_exist == False):
        model = effective_restaurant()
        model.id = id
        model.url_web = url_web
        model.url_wap = url_wap
        model.url_api = url_api
        model.waimei_src = 'eleme'
        model.waimai_src_cn = '饿了么'

        class_model = model.__dict__
        print(class_model)
        print("-----------------------------id为:" + str(id) +
              "-----------------------------------")
        Insert(class_model, "effective_restaurants")
        print("*********************************Insert  id为:" + str(id) +
              "*********************************")
예제 #4
0
def get_areas_from_Mysql():
    session = DBSession()
    areas = session.query(MySql_Area).all()
    itmes = []
    print(len(areas))
    for item in areas:
        class_item = item.__dict__
        # print(class_item)
        area = Area()
        area.id = class_item.get("id")
        area.pid = class_item.get("pid")
        area.shortname = class_item.get("shortname")
        area.name = class_item.get("name")
        area.merger_name = class_item.get("merger_name")
        area.level = class_item.get("level")
        area.pinyin = class_item.get("pinyin")
        area.code = class_item.get("code")
        area.zip_code = class_item.get("zip_code")
        area.first = class_item.get("first")
        area.lng = class_item.get("lng")
        area.lat = class_item.get("lat")
        class_area = area.__dict__
        # print(class_area)

        itmes.append(class_area)

        # Insert(class_item,'Spider_China_Areas')
        # lists.append(item.__dict__)
    Insert(itmes, "Spider_China_Areas")
예제 #5
0
def get_meishijie_shiliao_shicai_ji(cid = _cid,category_pinyin=""):
    print("获取cid为"+str(cid)+"的禁忌食材,catagory_pinyin:"+category_pinyin+"-----------开始------------")
    url = base_url + str(cid)
    html = get_html_by_url(url)
    soup = BeautifulSoup(html)
    # shiyi_shicai = soup.li.next_siblings
    soup_shiyi_shicais = soup.findAll(id="ji_more")

    shicai_type = ''
    shicai_type_name = ''
    shicai_remark = ''
    shiliao_shicais = []
    for soup_shiyi_shicai in soup_shiyi_shicais:
        shicai_type = soup_shiyi_shicai["class"][0]
        shicai_type_name = soup_shiyi_shicai.string
        remark_spans = soup_shiyi_shicai.next_sibling
        # print(soup_shiyi_shicai.next_sibling.next_sibling)
        ji_shicais = soup_shiyi_shicai.next_sibling.next_sibling.next_sibling

        for remark_span in remark_spans:
            # print(remark_span)
            shicai_remark = shicai_remark + remark_span.string + "\n\t "
        for shicai_li in ji_shicais:
            # for a in shicai_li:
                # print(a))
            shicai_li_string = str(shicai_li.string).strip()
            # print()
            if shicai_li.string is not None:
                if shicai_li.string != "\n" and len(shicai_li_string)>0:
                    shiliao_shicai = meishijiie_shiliao_shicai()
                    shiliao_shicai.cid = cid
                    shiliao_shicai.category_pinyin = category_pinyin
                    shiliao_shicai.cnName = shicai_li.string
                    shiliao_shicai.remark = shicai_remark
                    shiliao_shicai.type = shicai_type
                    shiliao_shicai.type_name = shicai_type_name
                    shiliao_shicai.url = shicai_base_url + shiliao_shicai.cnName
                    try:
                        d = s_pinyin.hanzi2pinyin_split(string=shiliao_shicai.cnName,split=' ')
                        l0 = d.replace(' ', '')
                        shiliao_shicai.pinyin = l0
                    except :
                        import traceback
                        # traceback.print_exc()
                    imgs = shicai_li.select("img")
                    for img in imgs:
                        shiliao_shicai.img_url = img["src"]
                    class_shiliao_shicai = shiliao_shicai.__dict__
                    # print(class_shiliao_shicai)
                    shiliao_shicais.append(class_shiliao_shicai)
                    # img = shicai_li.find_all("img")
                    # print(img)

    if (len(shiliao_shicais)>0):
        Insert(shiliao_shicais, "Meishijie_shiliao_shicais")
    print("获取cid为" + str(cid) + "的禁忌食材,catagory_pinyin:" + category_pinyin + "-----------结束------------")
예제 #6
0
def get_cities():
    content = get_content_by_url(eleme_cities)
    data = content.decode("utf8", "ignore")
    obj = eval(data)
    print(sorted(obj.keys()))

    for item in sorted(obj.keys()):
        cities = []
        key = item
        item_list = obj[item]
        print(key)
        print(item_list)
        print("\n")
        for city in item_list:
            print(city)
            eleme_city = ElemeCities_Item()
            eleme_city.or_id = city["id"]
            eleme_city.meta = key
            eleme_city.abbr = city["abbr"]
            eleme_city.latitude = city["latitude"]
            eleme_city.longitude = city["longitude"]
            eleme_city.name = city["name"]
            eleme_city.pinyin = city["pinyin"]
            # eleme_city.geohash =geohash.encode(eleme_city.latitude,eleme_city.longitude,precision=12)
            eleme_city.geohash = geohash.encode(eleme_city.latitude,
                                                eleme_city.longitude, 12)
            print("\n")
            v = eleme_city.__dict__
            print(v)
            cities.append(v)
        f = open(filepath, 'a')
        s = str(cities)
        f.write(s)
        f.close()
        print(cities)
        Insert(cities, "Spider_Eleme_Cities_WithGeoHash")
예제 #7
0
def get_dish_menus(cid=_cid,page_num=1,cai_menu_types_st="3",category_pinyin=''):
     print("CID 为:"+str(cid)+" st 为"+cai_menu_types_st+"  pagenum 为 " +str(page_num) +" category :"+category_pinyin +"-------开始---------------")
     # print(dish_types[cai_menu_types_st])
     # return

     url = base_url+str(cid)+"&sortby=update&st="+cai_menu_types_st+"&page="+str(page_num)
     html = get_html_by_url(url)
     soup = BeautifulSoup(html)

     total_page =0
     total_page_nums = soup.select(".gopage form",)
     for total_page_num in total_page_nums:
         page_text= str(total_page_num.get_text()).replace('页','').replace('共','').replace('到第','').replace(',','').strip()
         # print(page_text)
         # print(len(page_text))
         total_page = int(page_text)
         # print(total_page)
         # print(type(total_page_num))
     cai_menu_lists = soup.select(".listtyle1_list .listtyle1 a")

     dish_menu_list =[]
     for cai_menu_list in cai_menu_lists:
         # print(cai_menu_list)
         dish_menu = meishijie_shiliao_dish_menu()
         dish_menu.link_url =cai_menu_list["href"]
         dish_menu.cid = cid
         dish_menu.dish_types_st = cai_menu_types_st
         cn_pre = ''
         pre_item = get_category_by_cid(cid)
         if pre_item is not None:
             cn_pre = pre_item["cnName"]
         dish_menu.dish_type =cn_pre+dish_types.get(cai_menu_types_st)
         dish_menu.dish_cn = cai_menu_list["title"]
         img =cai_menu_list.find("img")
         dish_menu.img_url =img["src"]
         remarks = cai_menu_list.select(".c2 li")
         # print(remarks)
         for remark in remarks :
             # print(remark.string)
             dish_menu.cooking_remark+=remark.string +" \r\n"
         try:
             d = s_pinyin.hanzi2pinyin_split(string=dish_menu.dish_cn, split=' ')
             l0 = d.replace(' ', '')
             dish_menu.dish_pinyin = l0
         except:
             import traceback
             # traceback.print_exc()
         dish_menu.page_num = page_num
         dish_menu_item = dish_menu.__dict__
         dish_menu_list.append(dish_menu_item)
     # print(dish_menu_list)
     # f = open(filepath, 'a')
     # s = str(dish_menu_list)
     # f.write(s)
     # f.close()
     if (len(dish_menu_list) > 0):
        Insert(dish_menu_list,"Mershijie_shiliao_dishmenus")
     page_num = page_num+1
     while (page_num <=total_page):
        # print(page)
        get_dish_menus(cid,cai_menu_types_st=cai_menu_types_st, page_num=page_num)
        break

     print("CID 为:"+str(cid)+" st 为"+cai_menu_types_st+"  pagenum 为 " +str(page_num) +" category :"+category_pinyin +"-------结束---------------")
예제 #8
0
def get_meishijie_categories(cid,category_pinyin='',category_cn=''):
    url=base_url+str(cid)
    html =get_html_by_url(url)
    # print(html)
    # soup = BeautifulSoup(html)
    # print(soup)
    # print(soup.prettify())

    sop = BeautifulSoup(html)
    # h = sop.prettify()
    # print( h )
    # head = sop.find('head')
    # print(head)
    # p_categories = sop.findAll(attrs={'id':'listnav_ul'})[0]
    # print(p_categories)

    # dds = sop.select(".listnav_dl_style1 dd a")
    dds = sop.select(".listnav_dl_style1 .current a")

    # print(dds)

    # print(len(dds))
    meishijie_shiliao_Categories=[]
    for dd in dds:
        # print(dd)
        s = dd.string
        d = s_pinyin.hanzi2pinyin_split(string=s,split=" ")
        l0 = d.replace(' ','')
        # l1  = d.strip()
        # print(dd["href"])
        # print(dd.string)
        # print(l0)

        meishijie_shiliao_fenlei = meishijie_shiliao_parant_category()
        meishijie_shiliao_fenlei.cid =cid
        # if cid == 160 :
        #     meishijie_shiliao_fenlei.category_pinyin="jibingtiaoli"
        #     meishijie_shiliao_fenlei.category_cn='疾病调理'
        # elif cid== 190:
        #     meishijie_shiliao_fenlei.category_pinyin="jibingtiaoli"
        #     meishijie_shiliao_fenlei.category_cn='疾病调理'
        meishijie_shiliao_fenlei.category_pinyin=category_pinyin
        meishijie_shiliao_fenlei.category_cn=category_cn
        meishijie_shiliao_fenlei.cnName=dd.string
        meishijie_shiliao_fenlei.pinyin = l0
        meishijie_shiliao_fenlei.url = dd["href"]

        class_meishijie_shiliao_fenlei = meishijie_shiliao_fenlei.__dict__
        meishijie_shiliao_Categories.append(class_meishijie_shiliao_fenlei)

    print(meishijie_shiliao_Categories)
    # return
    Insert(meishijie_shiliao_Categories,collectionName='Meishijie_shiliao_Categories')

    '''获取该分类食材 start '''
    get_meishijie_shiliao_shicai_yi(cid,meishijie_shiliao_fenlei.category_pinyin) #适宜食材
    get_meishijie_shiliao_shicai_ji(cid,meishijie_shiliao_fenlei.category_pinyin) #禁忌食材
    '''获取该分类食材 end'''

    # thread1 = timer(5, 2)
    # thread1.start()
    for st in dish_types_st:
        # print(st)
        # print(dish_types_st.get(st))
        get_dish_menus(cid,page_num=1,cai_menu_types_st=dish_types_st.get(st),category_pinyin=meishijie_shiliao_fenlei.category_cn)