def get_img(self, url, headers=None, cookies=None, timeout=60, verify=False, proxies=None, allow_redirects=True, params=None): """ get方式获取 img 二进制信息 :param url: 访问Url :param headers: 请求头 :param cookies: 请求cookies :param timeout: 超时时间 :param verify: ssl验证 :param proxies: 代理 :param allow_redirects: 是否允许重定向 :param encoding: 返回的html编码s :param params: 查询请求参数 :return: 二进制图片数据 """ if self.session: r = self.session.get(url, headers=odict(headers), cookies=cookies, timeout=timeout, verify=verify, proxies=proxies, allow_redirects=allow_redirects, params=params) else: r = requests.get(url, headers=odict(headers), cookies=cookies, timeout=timeout, verify=verify, proxies=proxies, allow_redirects=allow_redirects, params=params) r.raise_for_status() return r.content
def classify(in_x, groups, labels, k): """分类""" # 计算欧式距离 gl = array_len(groups) tmp = tile(in_x, (gl, 1)) - groups tmp = exponential_operation(tmp, 2) tmp = array_sum(tmp) tmp = exponential_operation(tmp, 0.5) # 得到排序后的数组的索引 arg = argsort(tmp) # 计算最相似数据的前k个数据的分类次数 cc = odict() for i in range(k): # 获得类别次数 la = labels[arg[i]] cc[la] = cc.get(la, 0) + 1 return max(cc) # 返回最相似数据的前k个数据中出现次数最多的分类作为新数据的分类
def post(self, url, headers=None, cookies=None, timeout=60, form_data=None, verify=False, proxies=None, allow_redirects=True, encoding='utf-8', params=None, stream=False): """ 模拟http post请求 :param url: 访问Url :param headers: 请求头 :param cookies: 请求cookies :param timeout: 超时时间 :param verify: ssl验证 :param proxies: 代理 :param allow_redirects: 是否允许重定向 :param encoding: 返回的html编码 :param params: 查询请求参数 :param stream: 是否为流数据 :return: (html, 响应头,响应cookie,访问历史,响应时间(秒)) 或者 流数据 """ return self.__select('post', url, headers=odict(headers), cookies=cookies, timeout=timeout, form_data=form_data, verify=verify, proxies=proxies, allow_redirects=allow_redirects, encoding=encoding, params=params, stream=stream)
def __select(self, method, url, headers=None, cookies=None, timeout=60, verify=False, proxies=None, allow_redirects=True, encoding='utf-8', params=None, form_data=None, stream=False): """ 对http动作的封装,当传递get,则模拟http get请求, 当传递post, 则模拟http post请求 :param method: 'get' or 'post' :param url: 访问Url :param headers: 请求头 :param cookies: 请求cookies :param timeout: 超时时间 :param verify: ssl验证 :param proxies: 代理 :param allow_redirects: 是否允许重定向 :param encoding: 返回的html编码 :param params: 查询请求参数 :param form_data: 如果是post请求则需要提供有表单提交则需要有这个 :param stream: 是否为流数据 :return: (html, 响应头,响应cookie,访问历史,响应时间(秒)) 或者 流数据 """ # 增加去重功能,开启后,如若不在history中则,则允许访问,并记住访问过此url # 下次重复访问时则抛出异常 self.filter_duplicate(url) r = None if method == 'get': if self.session: r = self.session.get(url, headers=odict(headers), cookies=cookies, timeout=timeout, verify=verify, proxies=proxies, allow_redirects=allow_redirects, params=params) else: r = requests.get(url, headers=odict(headers), cookies=cookies, timeout=timeout, verify=verify, proxies=proxies, allow_redirects=allow_redirects, params=params) elif method == 'post': if self.session: r = self.session.post(url, headers=odict(headers), cookies=cookies, timeout=timeout, data=form_data, verify=verify, proxies=proxies, allow_redirects=allow_redirects, params=params) else: r = requests.post(url, headers=odict(headers), cookies=cookies, timeout=timeout, data=form_data, verify=verify, proxies=proxies, allow_redirects=allow_redirects, params=params) else: raise Exception('unsupported http method') # 若http状态码如果不正常则抛出异常 r.raise_for_status() r.encoding = encoding # 设置html编码 if stream: # 如果为流式数据 return r return r.text, r.headers, dict_from_cookiejar( r.cookies), r.history, r.elapsed.microseconds / 1000000