Exemple #1
0
def get_stock(part_number):
    url = 'https://www.maximintegrated.com/bin/mySearchServlet'
    post_data = box.headers_to_dict("""input:single
query:MAX15036ATE+T
userType:anonymous""")
    post_data['query'] = part_number
    # print post_data
    # print headers
    # init
    part_data = {
        'tiered': [[0, 0.00]],
        'stock': [0, 1],
        'increment': 1,
    }
    try:
        response = requests.post(url=url, data=post_data, headers=headers)
        with open('stock.html', 'w') as fp:
            fp.write(response.content)
        data = json.loads(response.content)
    except:
        return part_data
    volume_prices = data.get('volumePrices', [])
    tiered = []
    for vol in volume_prices:
        min_qty = vol.get('minQuantity')
        price = vol.get('value')
        tiered.append([min_qty, price])
    if not tiered:
        tiered = [[0, 0.00]]
    stock = data.get('stock', {}).get('atpInv', 0)
    qty = data.get('min', 1)
    part_data['stock'] = [stock, qty]
    part_data['increment'] = data.get('mult', 1)
    part_data['tiered'] = tiered
    return part_data
Exemple #2
0
import requests
from tools import box
from bs4 import BeautifulSoup

_logger = logging.getLogger('hqchip_spider')

headers_str = """
Host: www.supchip.com
Connection: keep-alive
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Referer: http://www.supchip.com/
Accept-Encoding: gzip, deflate, sdch
Accept-Language: en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4
"""
default_headers = box.headers_to_dict(headers_str)


def fetcher(url, data=None, **kwargs):
    """获取URL数据"""
    if kwargs.get('headers', None):
        _headers = kwargs['headers']
    else:
        _headers = {
            'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Connection': 'keep-alive',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36',
Exemple #3
0
import json
import requests
from tools import box
from w3lib.html import remove_tags

headers = """
accept:application/json, text/javascript, */*; q=0.01
accept-encoding:gzip, deflate, br
accept-language:en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4
cache-control:no-cache
content-length:0
origin:https://www.maximintegrated.com
pragma:no-cache
user-agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36
x-requested-with:XMLHttpRequest"""
headers = box.headers_to_dict(headers)


def get_part_name(series_name=None):
    series_name = series_name if series_name else ''
    url = 'https://www.maximintegrated.com/bin/ProductCatalogSearchServlet'
    if series_name:
        params_data = {'canSearch': '', 'product_info_root_part': series_name}
        html = requests.get(url=url, params=params_data, headers=headers)
        # with open('parts_list.html', 'w') as fp:
        #     fp.write(html.content)
        data = json.loads(html.content)
        print data.get('Root_Part_Numbers')[0].get(series_name).get(
            'Orderable_Part_Numbers')[0]

Exemple #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by Vin on 2017/5/2

import requests
from tools.incapsula_cracker import incapsula_parse, IncapSession
import json
import re
from tools.box import headers_to_dict
_headers = headers_to_dict(
    """Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Accept-Encoding:gzip, deflate, sdch, br
Accept-Language:en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4
Cache-Control:no-cache
Connection:keep-alive
Cookie:incap_ses_500_731139=7+rGP1vG0Ukgo9ADqVvwBrbhB1kAAAAAVtQzQCdT9m6c+BcY9vglpw==; renderid=rend02; incap_ses_490_731139=fgMZJ2teUiwcORuFy9TMBlAgCFkAAAAAUjSMJ6QEU8t2pIc6Y+ReAA==; incap_ses_532_731139=PIIVDt11gSVgruX0eAtiB4YjCFkAAAAAIUZOznZeB3+B/99o7PT9fg==; s_sq=%5B%5BB%5D%5D; _ga=GA1.2.667915121.1489570252; _gid=GA1.2.407305118.1493712114; __utmt_75e5e075f7ab2c7c6d58c241dc444533=1; __utma=132994193.667915121.1489570252.1493712115.1493714367.22; __utmb=132994193.1.10.1493714367; __utmc=132994193; __utmz=132994193.1489570252.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); visid_incap_731139=LE3UJZ6TS2C/6+D3lL8BuMQJyVgAAAAAQkIPAAAAAACA00V7AXMcWO7ULZbwNsZbb4ooMMSSq06J; incap_ses_552_731139=+v5qKmFRHEyslEbtTxmpB7tFCFkAAAAAIq6K7KYpuRk/Nq7gniyL6A==; s_cc=true; s_fid=0D910DA93DE8FF4F-1AC34B66A1B67C54; s_nr=1493714370591-Repeat; s_lv=1493714370595; s_lv_s=Less%20than%201%20day; s_vi=[CS]v1|2C6484E8051D145C-60000151A001129E[CE]
DNT:1
Host:www.ttiinc.com
Pragma:no-cache
Upgrade-Insecure-Requests:1
User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36"""
)


def main():
    home = "https://www.ttiinc.com/content/ttiinc/en.html"
    default_headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36',
        'Accept': 'application/json, text/plain, */*',
        'Accept-Encoding': 'gzip, deflate, br',
Exemple #5
0
import time
import urlparse
import requests
import lxml.html

from tools import box as util

headers_string = """Host: www.richardsonrfpd.com
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Referer: http://www.richardsonrfpd.com/Pages/home.aspx
Accept-Encoding: gzip, deflate
Accept-Language: en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4
"""

default_headers = util.headers_to_dict(headers_str=headers_string)
print default_headers


def get_event_kwargs(response=None, **kwargs):
    """获取网页中隐藏表单的值
    返回整理好的触发事件需要的表单数据字典
    :param response: 请求网页返回的对象
    :param kwargs: 其他的需要加入时间表单的键值对
    :return: 字典
    """
    html = kwargs.pop('html', None)
    if response is None and html is None:
        return -400
    try:
        html = response.text.encode('utf-8') if response else html
# coding=utf-8

from tools import box

headers = """
Host: www.microchipdirect.com
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
Referer: http://www.microchipdirect.com/Chart.aspx?branchId=30049&mid=10&treeid=1
Accept-Encoding: gzip, deflate, sdch
Accept-Language: en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4
"""
if __name__ == "__main__":
    print box.headers_to_dict(headers)
    pass