def get_stock(part_number): url = 'https://www.maximintegrated.com/bin/mySearchServlet' post_data = box.headers_to_dict("""input:single query:MAX15036ATE+T userType:anonymous""") post_data['query'] = part_number # print post_data # print headers # init part_data = { 'tiered': [[0, 0.00]], 'stock': [0, 1], 'increment': 1, } try: response = requests.post(url=url, data=post_data, headers=headers) with open('stock.html', 'w') as fp: fp.write(response.content) data = json.loads(response.content) except: return part_data volume_prices = data.get('volumePrices', []) tiered = [] for vol in volume_prices: min_qty = vol.get('minQuantity') price = vol.get('value') tiered.append([min_qty, price]) if not tiered: tiered = [[0, 0.00]] stock = data.get('stock', {}).get('atpInv', 0) qty = data.get('min', 1) part_data['stock'] = [stock, qty] part_data['increment'] = data.get('mult', 1) part_data['tiered'] = tiered return part_data
import requests from tools import box from bs4 import BeautifulSoup _logger = logging.getLogger('hqchip_spider') headers_str = """ Host: www.supchip.com Connection: keep-alive User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Referer: http://www.supchip.com/ Accept-Encoding: gzip, deflate, sdch Accept-Language: en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4 """ default_headers = box.headers_to_dict(headers_str) def fetcher(url, data=None, **kwargs): """获取URL数据""" if kwargs.get('headers', None): _headers = kwargs['headers'] else: _headers = { 'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4', 'Accept-Encoding': 'gzip, deflate, sdch', 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36',
import json import requests from tools import box from w3lib.html import remove_tags headers = """ accept:application/json, text/javascript, */*; q=0.01 accept-encoding:gzip, deflate, br accept-language:en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4 cache-control:no-cache content-length:0 origin:https://www.maximintegrated.com pragma:no-cache user-agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 x-requested-with:XMLHttpRequest""" headers = box.headers_to_dict(headers) def get_part_name(series_name=None): series_name = series_name if series_name else '' url = 'https://www.maximintegrated.com/bin/ProductCatalogSearchServlet' if series_name: params_data = {'canSearch': '', 'product_info_root_part': series_name} html = requests.get(url=url, params=params_data, headers=headers) # with open('parts_list.html', 'w') as fp: # fp.write(html.content) data = json.loads(html.content) print data.get('Root_Part_Numbers')[0].get(series_name).get( 'Orderable_Part_Numbers')[0]
#!/usr/bin/env python # -*- coding: utf-8 -*- # Created by Vin on 2017/5/2 import requests from tools.incapsula_cracker import incapsula_parse, IncapSession import json import re from tools.box import headers_to_dict _headers = headers_to_dict( """Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding:gzip, deflate, sdch, br Accept-Language:en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4 Cache-Control:no-cache Connection:keep-alive Cookie:incap_ses_500_731139=7+rGP1vG0Ukgo9ADqVvwBrbhB1kAAAAAVtQzQCdT9m6c+BcY9vglpw==; renderid=rend02; incap_ses_490_731139=fgMZJ2teUiwcORuFy9TMBlAgCFkAAAAAUjSMJ6QEU8t2pIc6Y+ReAA==; incap_ses_532_731139=PIIVDt11gSVgruX0eAtiB4YjCFkAAAAAIUZOznZeB3+B/99o7PT9fg==; s_sq=%5B%5BB%5D%5D; _ga=GA1.2.667915121.1489570252; _gid=GA1.2.407305118.1493712114; __utmt_75e5e075f7ab2c7c6d58c241dc444533=1; __utma=132994193.667915121.1489570252.1493712115.1493714367.22; __utmb=132994193.1.10.1493714367; __utmc=132994193; __utmz=132994193.1489570252.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); visid_incap_731139=LE3UJZ6TS2C/6+D3lL8BuMQJyVgAAAAAQkIPAAAAAACA00V7AXMcWO7ULZbwNsZbb4ooMMSSq06J; incap_ses_552_731139=+v5qKmFRHEyslEbtTxmpB7tFCFkAAAAAIq6K7KYpuRk/Nq7gniyL6A==; s_cc=true; s_fid=0D910DA93DE8FF4F-1AC34B66A1B67C54; s_nr=1493714370591-Repeat; s_lv=1493714370595; s_lv_s=Less%20than%201%20day; s_vi=[CS]v1|2C6484E8051D145C-60000151A001129E[CE] DNT:1 Host:www.ttiinc.com Pragma:no-cache Upgrade-Insecure-Requests:1 User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36""" ) def main(): home = "https://www.ttiinc.com/content/ttiinc/en.html" default_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36', 'Accept': 'application/json, text/plain, */*', 'Accept-Encoding': 'gzip, deflate, br',
import time import urlparse import requests import lxml.html from tools import box as util headers_string = """Host: www.richardsonrfpd.com User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Referer: http://www.richardsonrfpd.com/Pages/home.aspx Accept-Encoding: gzip, deflate Accept-Language: en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4 """ default_headers = util.headers_to_dict(headers_str=headers_string) print default_headers def get_event_kwargs(response=None, **kwargs): """获取网页中隐藏表单的值 返回整理好的触发事件需要的表单数据字典 :param response: 请求网页返回的对象 :param kwargs: 其他的需要加入时间表单的键值对 :return: 字典 """ html = kwargs.pop('html', None) if response is None and html is None: return -400 try: html = response.text.encode('utf-8') if response else html
# coding=utf-8 from tools import box headers = """ Host: www.microchipdirect.com User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Referer: http://www.microchipdirect.com/Chart.aspx?branchId=30049&mid=10&treeid=1 Accept-Encoding: gzip, deflate, sdch Accept-Language: en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4 """ if __name__ == "__main__": print box.headers_to_dict(headers) pass