コード例 #1
0
def open_http(url, data=None):
    """Use HTTP protocol."""
    user_passwd = None
    proxy_passwd= None
    if isinstance(url, str):
        host, selector = splithost(url)
        if host:
            user_passwd, host = splituser(host)
            host = urllib.parse.unquote(host)
        realhost = host
    else:
        host, selector = url
        # check whether the proxy contains authorization information
        proxy_passwd, host = splituser(host)
        # now we proceed with the url we want to obtain
        urltype, rest = urllib.parse.splittype(selector)
        url = rest
        user_passwd = None
        if urltype.lower() != 'http':
            realhost = None
        else:
            realhost, rest = splithost(rest)
            if realhost:
                user_passwd, realhost = splituser(realhost)
            if user_passwd:
                selector = "%s://%s%s" % (urltype, realhost, rest)
            if urllib.proxy_bypass(realhost):
                host = realhost

        #print "proxy via http:", host, selector
    if not host: raise IOError('http error', 'no host given')

    if proxy_passwd:
        import base64
        proxy_auth = base64.b64encode(proxy_passwd).strip()
    else:
        proxy_auth = None

    if user_passwd:
        import base64
        auth = base64.b64encode(user_passwd).strip()
    else:
        auth = None
    c = FakeHTTPConnection(host)
    if data is not None:
        c.putrequest('POST', selector)
        c.putheader('Content-Type', 'application/x-www-form-urlencoded')
        c.putheader('Content-Length', '%d' % len(data))
    else:
        c.putrequest('GET', selector)
    if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
    if auth: c.putheader('Authorization', 'Basic %s' % auth)
    if realhost: c.putheader('Host', realhost)
    for args in URLopener().addheaders: c.putheader(*args)
    c.endheaders()
    return c
コード例 #2
0
    def post(self, query):
        i = self.institution
        logging.debug('posting data to %s' % i.url)
        logging.debug('---- request ----')
        logging.debug(query)
        garbage, path = splittype(i.url)
        host, selector = splithost(path)
        h = HTTPSConnection(host, timeout=60)
        # Discover requires a particular ordering of headers, so send the
        # request step by step.
        h.putrequest('POST',
                     selector,
                     skip_host=True,
                     skip_accept_encoding=True)
        h.putheader('Content-Type', 'application/x-ofx')
        h.putheader('Host', host)
        h.putheader('Content-Length', len(query))
        h.putheader('Connection', 'Keep-Alive')
        if self.accept:
            h.putheader('Accept', self.accept)
        if self.user_agent:
            h.putheader('User-Agent', self.user_agent)
        h.endheaders(query.encode())
        res = h.getresponse()
        response = res.read().decode('ascii', 'ignore')
        logging.debug('---- response ----')
        logging.debug(res.__dict__)
        logging.debug(response)
        res.close()

        return response
コード例 #3
0
def here(modal, string):
    ban_words = ['here', 'click', 'Here', 'Click', 'CLICK', 'HERE']
    here_num = 0
    ban_flag = 0
    stand_host = modal
    stand_host_1 = ''

    urls = re.findall(r'<[Aa].*?href=.*?</[Aa]>', string, re.S)
    for url in urls:
        for word in ban_words:
            if word in url:  #如果找到click、here敏感词
                ban_flag = 1
                break

        if ban_flag == 1:
            http_url = re.findall(
                r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                str(url))

            if len(http_url) > 0:
                first_url = http_url[0]
                proto, rest = splittype(first_url)
                host, rest = splithost(rest)
                host, port = splitport(host)
                stand_host = host

                # print(host)
                if host in modal:  #如果域名相同->0
                    pass
                else:
                    stand_host_1 = host
                    here_num = 1  #域名不同->1
    if stand_host_1:
        stand_host = stand_host_1
    return str(here_num), str(stand_host)
コード例 #4
0
ファイル: xmlrpc.py プロジェクト: lmcdonough/supervisor
 def __init__(self, username=None, password=None, serverurl=None):
     xmlrpclib.Transport.__init__(self)
     self.username = username
     self.password = password
     self.verbose = False
     self.serverurl = serverurl
     if serverurl.startswith('http://'):
         type, uri = urllib.splittype(serverurl)
         host, path = urllib.splithost(uri)
         host, port = urllib.splitport(host)
         if port is None:
             port = 80
         else:
             port = int(port)
         def get_connection(host=host, port=port):
             return httplib.HTTPConnection(host, port)
         self._get_connection = get_connection
     elif serverurl.startswith('unix://'):
         def get_connection(serverurl=serverurl):
             # we use 'localhost' here because domain names must be
             # < 64 chars (or we'd use the serverurl filename)
             conn = UnixStreamHTTPConnection('localhost')
             conn.socketfile = serverurl[7:]
             return conn
         self._get_connection = get_connection
     else:
         raise ValueError('Unknown protocol for serverurl %s' % serverurl)
コード例 #5
0
ファイル: AsyncRequest.py プロジェクト: shacleff/YiWuMJ
    def work(self, task):
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            pro_, rest = splittype(task[0])
            host, rest = splithost(rest)
            host, port = splitport(host)
            task.append(rest)
            task.append(host)
            sock.setblocking(0)
            sock.connect_ex((host, int(port) if port else 80))

            def timeout_cb():
                if not sock._closed:
                    KBEngine.deregisterWriteFileDescriptor(sock.fileno())
                    sock.close()
                if task and task[2]:
                    task[2](None)

            self._write_timer[sock.fileno()] = self.add_timer(
                REQUEST_TIMEOUT, timeout_cb)
            KBEngine.registerWriteFileDescriptor(
                sock.fileno(), Functor(self.onSend, task, sock))
        except:
            self._tasks.append(task)
            self.logsError()
            if not sock._closed:
                sock.close()
コード例 #6
0
    def __init__(self, username=None, password=None, serverurl=None):
        xmlrpclib.Transport.__init__(self)
        self.username = username
        self.password = password
        self.verbose = False
        self.serverurl = serverurl
        if serverurl.startswith('http://'):
            type, uri = urllib.splittype(serverurl)
            host, path = urllib.splithost(uri)
            host, port = urllib.splitport(host)
            if port is None:
                port = 80
            else:
                port = int(port)

            def get_connection(host=host, port=port):
                return httplib.HTTPConnection(host, port)

            self._get_connection = get_connection
        elif serverurl.startswith('unix://'):

            def get_connection(serverurl=serverurl):
                # we use 'localhost' here because domain names must be
                # < 64 chars (or we'd use the serverurl filename)
                conn = UnixStreamHTTPConnection('localhost')
                conn.socketfile = serverurl[7:]
                return conn

            self._get_connection = get_connection
        else:
            raise ValueError('Unknown protocol for serverurl %s' % serverurl)
コード例 #7
0
    def __init__(self,
                 server_url,
                 proxy=None,
                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
                 ca_cert=None,
                 client_cert=None,
                 client_cert_pass=None):
        """
        initialize the transport class
        """
        xmlrpclib.Transport.__init__(self)

        client_cert = client_cert or (None, None)
        self.disable_ssl_validation = False
        self.scheme = urllib.splittype(server_url)[0]
        self.https = (self.scheme == 'https')
        self.proxy = None
        self.timeout = timeout
        self._certfile, self._keyfile = client_cert
        self.ca_cert = ca_cert
        self.client_cert_pass = client_cert_pass

        # pull system proxy if no proxy is forced
        if not proxy:
            if self.https:
                proxy = os.environ.get('https_proxy', None)
            else:
                proxy = os.environ.get('http_proxy', None)

        if proxy:
            scheme, proxy_url = urllib.splittype(proxy)
            self.proxy = urllib.splithost(proxy_url)[0]

            # re-check if we need to support https
            self.https = (scheme == 'https')
コード例 #8
0
 def open_local_file(self, url):
     """Use local file."""
     import mimetypes, mimetools, email.Utils
     try:
         from io import StringIO
     except ImportError:
         from io import StringIO
     host, file = splithost(url)
     localname = url2pathname(file)
     try:
         stats = os.stat(localname)
     except OSError as e:
         raise IOError(e.errno, e.strerror, e.filename)
     size = stats.st_size
     modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
     mtype = mimetypes.guess_type(url)[0]
     headers = mimetools.Message(
         StringIO(
             'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
             (mtype or 'text/plain', size, modified)))
     if not host:
         urlfile = file
         if file[:1] == '/':
             urlfile = 'file://' + file
         return addinfourl(open(localname, 'rb'), headers, urlfile)
     host, port = splitport(host)
     if not port \
        and socket.gethostbyname(host) in (localhost(), thishost()):
         urlfile = file
         if file[:1] == '/':
             urlfile = 'file://' + file
         return addinfourl(open(localname, 'rb'), headers, urlfile)
     raise IOError('local file error', 'not on local host')
コード例 #9
0
def get_info_by_url(url):
    protocol, rest = parse.splittype(url)
    host, path = parse.splithost(rest)
    host, port = parse.splitport(host)
    if port is None:
        port = '80'
    return protocol, host, path, port
コード例 #10
0
 def handleData(self, response):
     s = response.text
     chapters_dict = dict()
     chapters = []
     queue_out = Queue()
     for field in chapter_fields:
         if self.re_rule.get(field):
             chapters_dict[field] = getRe(s, self.re_rule[field])
         elif self.xpath_rule.get(field):
             chapters_dict[field] = self.getXpath(s, self.xpath_rule[field])
     urls = chapters_dict['url']
     if urls[0] != '' and urls[0][0] == '/' and urls[0][1] != '/':
         menu_url = WEB_SETTINGS[self.web]['menu'].format('')
         proto, rest = splittype(menu_url)
         host, rest = splithost(rest)
         chapters_dict['url'] = [
             proto + '://' + host + url[:] for url in urls
         ]
     print(chapters_dict)
     # self.req().createChapter(self.web, chapters_dict['url'], queue_out)
     for i in range(len(chapters_dict['url'])):
         chapter = Chapter()
         chapter.url = chapters_dict['url'][i].replace(' ', '')
         chapter.title = chapters_dict['title'][i].replace(' ', '')
         chapter.content = '该章节下载失败'
         chapters.append(chapter)
     # self.dContent(chapters, queue_out)
     # Ui_MainWindow.tabWidget.get_ResultWidget('空').label.setText('啊哈哈')
     return chapters
コード例 #11
0
def find_modal(list):
    domain_list = []
    for url in list:
        http_url = re.findall(
            r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
            str(url))
        if len(http_url) > 0:
            first_url = http_url[0]
            proto, rest = splittype(first_url)
            host, rest = splithost(rest)
            host, port = splitport(host)
            domain_list.append(host)
            # print(host)
        else:
            host_ip_num = 0
    word_counts = collections.Counter(domain_list)
    # 出现频率最高的3个单词
    top_one = word_counts.most_common(1)
    if len(top_one) > 0:
        modal = top_one[0][0]
        # print(modal)
    else:
        modal = '-'

    return modal
コード例 #12
0
 def retry_proxy_https_basic_auth(self, url, realm, data=None):
     host, selector = splithost(url)
     newurl = 'https://' + host + selector
     proxy = self.proxies['https']
     urltype, proxyhost = splittype(proxy)
     proxyhost, proxyselector = splithost(proxyhost)
     i = proxyhost.find('@') + 1
     proxyhost = proxyhost[i:]
     user, passwd = self.get_user_passwd(proxyhost, realm, i)
     if not (user or passwd): return None
     proxyhost = quote(user, safe='') + ':' + quote(
         passwd, safe='') + '@' + proxyhost
     self.proxies['https'] = 'https://' + proxyhost + proxyselector
     if data is None:
         return self.open(newurl)
     else:
         return self.open(newurl, data)
コード例 #13
0
 def __init__(self, url, headers=None):
     self.url = url
     self.headers = headers
     self.origin_req_host = cookielib.request_host(self)
     self.type, r = splittype(url)
     self.host, r = splithost(r)
     if self.host:
         self.host = unquote(self.host)
コード例 #14
0
    def _do_post(self, query, extra_headers=[]):
        """
        Do a POST to the Institution.

        :param query: Body content to POST (OFX Query)
        :type query: str
        :param extra_headers: Extra headers to send with the request, as a list
          of (Name, Value) header 2-tuples.
        :type extra_headers: list
        :return: 2-tuple of (HTTPResponse, str response body)
        :rtype: tuple
        """
        i = self.institution
        logging.debug('posting data to %s' % i.url)
        garbage, path = splittype(i.url)
        host, selector = splithost(path)
        try:
            h = HTTPSConnection(host, timeout=60)
            h.connect()
        except ssl.SSLError as ex:
            if (ex.reason == "UNSUPPORTED_PROTOCOL"):
                h = HTTPSConnection(host,
                                    timeout=60,
                                    context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
                h.connect()
            else:
                raise
        # Discover requires a particular ordering of headers, so send the
        # request step by step.
        h.putrequest('POST',
                     selector,
                     skip_host=True,
                     skip_accept_encoding=True)
        headers = [('Content-Type', 'application/x-ofx'), ('Host', host),
                   ('Content-Length', len(query)),
                   ('Connection', 'Keep-Alive')]
        if self.accept:
            headers.append(('Accept', self.accept))
        if self.user_agent:
            headers.append(('User-Agent', self.user_agent))
        for ehname, ehval in extra_headers:
            headers.append((ehname, ehval))
        logging.debug('---- request headers ----')
        for hname, hval in headers:
            logging.debug('%s: %s', hname, hval)
            h.putheader(hname, hval)
        logging.debug('---- request body (query) ----')
        logging.debug(query)
        h.endheaders(query.encode())
        res = h.getresponse()
        response = res.read().decode('ascii', 'ignore')
        logging.debug('---- response ----')
        logging.debug(res.__dict__)
        logging.debug('Headers: %s', res.getheaders())
        logging.debug(response)
        res.close()
        return res, response
コード例 #15
0
def get_parser_from_url(url):
    global PARSER
    protocol, s1 = splittype(url)
    host, path = splithost(s1)
    for i, j in PARSER.items():
        if i in host:
            return j

    return None
コード例 #16
0
ファイル: client.py プロジェクト: egh/ofxclient
    def _do_post(self, query, extra_headers=[]):
        """
        Do a POST to the Institution.

        :param query: Body content to POST (OFX Query)
        :type query: str
        :param extra_headers: Extra headers to send with the request, as a list
          of (Name, Value) header 2-tuples.
        :type extra_headers: list
        :return: 2-tuple of (HTTPResponse, str response body)
        :rtype: tuple
        """
        i = self.institution
        logging.debug('posting data to %s' % i.url)
        garbage, path = splittype(i.url)
        host, selector = splithost(path)
        try:
            h = HTTPSConnection(host, timeout=60)
            h.connect()
        except ssl.SSLError as ex:
            if (ex.reason == "UNSUPPORTED_PROTOCOL"):
                h = HTTPSConnection(host, timeout=60, context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
                h.connect()
            else:
                raise
        # Discover requires a particular ordering of headers, so send the
        # request step by step.
        h.putrequest('POST', selector, skip_host=True,
                     skip_accept_encoding=True)
        headers = [
            ('Content-Type', 'application/x-ofx'),
            ('Host', host),
            ('Content-Length', len(query)),
            ('Connection', 'Keep-Alive')
        ]
        if self.accept:
            headers.append(('Accept', self.accept))
        if self.user_agent:
            headers.append(('User-Agent', self.user_agent))
        for ehname, ehval in extra_headers:
            headers.append((ehname, ehval))
        logging.debug('---- request headers ----')
        for hname, hval in headers:
            logging.debug('%s: %s', hname, hval)
            h.putheader(hname, hval)
        logging.debug('---- request body (query) ----')
        logging.debug(query)
        h.endheaders(query.encode())
        res = h.getresponse()
        response = res.read().decode('ascii', 'ignore')
        logging.debug('---- response ----')
        logging.debug(res.__dict__)
        logging.debug('Headers: %s', res.getheaders())
        logging.debug(response)
        res.close()
        return res, response
コード例 #17
0
    def _fix_url(self, url):
        if url != "":
            if "http" not in url:
                t, other = parse.splittype(self.pre_url)
                host, path = parse.splithost(other)

                if url[0] != "/":
                    url = t + "://" + host + "/" + url
                else:
                    url = t + "://" + host + url
        return url
コード例 #18
0
    def __init__(self,
                 uri,
                 transport=None,
                 encoding=None,
                 verbose=0,
                 version=None,
                 headers=None,
                 history=None,
                 config=jsonrpclib.config.DEFAULT,
                 context=None,
                 timeout=None):
        """
        Sets up the server proxy

        :param uri: Request URI
        :param transport: Custom transport handler
        :param encoding: Specified encoding
        :param verbose: Log verbosity level
        :param version: JSON-RPC specification version
        :param headers: Custom additional headers for each request
        :param history: History object (for tests)
        :param config: A JSONRPClib Config instance
        :param context: The optional SSLContext to use
        """
        # Store the configuration
        self._config = config
        self.__version = version or config.version

        schema, uri = splittype(uri)
        if schema not in ('http', 'https'):
            _logger.error("jsonrpclib only support http(s) URIs, not %s",
                          schema)
            raise IOError('Unsupported JSON-RPC protocol.')

        self.__host, self.__handler = splithost(uri)
        if not self.__handler:
            # Not sure if this is in the JSON spec?
            self.__handler = '/'

        if transport is None:
            if schema == 'https':
                transport = SafeTransport(config=config,
                                          context=context,
                                          timeout=timeout)
            else:
                transport = Transport(config=config, timeout=timeout)
        self.__transport = transport

        self.__encoding = encoding
        self.__verbose = verbose
        self.__history = history

        # Global custom headers are injected into Transport
        self.__transport.push_headers(headers or {})
コード例 #19
0
def domain_name(list):
    domain_name_list = []
    for url in list:
        proto, rest = splittype(url)
        host, rest = splithost(rest)
        host, port = splitport(host)
        if host not in domain_name_list:
            domain_name_list.append(host)
        else:
            pass
    return len(domain_name_list)
コード例 #20
0
ファイル: program.py プロジェクト: dannyrz/AliZhaoBiao
def parsePage(spider, url, response):
    html = response.content
    selector = etree.HTML(html)
    html = html.decode('utf-8')
    propertys = json.loads(spider.args['PagePropertyRegularExpression'])
    for key in propertys:
        item = propertys[key]
        if item.startswith('$'):
            p1 = r'%s' % item[1:]
            pattern = re.compile(p1)
            match = pattern.search(html)

            if match:
                propertys[key] = match.group(1)
            #对文章的内容进行特殊处理,提取图片
            if key == 'content_raw':
                contentselector = etree.HTML(propertys[key])
                etree.strip_elements(contentselector, 'script')
                etree.strip_tags(contentselector, 'a')
                propertys[key] = etree.tostring(contentselector).decode(
                    'utf-8')
                if spider.args['DownLoadImg'] == 1:
                    for imgsrc in contentselector.xpath("//img/@src"):
                        if imgsrc is not None and len(imgsrc) > 0:
                            cache.rpush('link-img', imgsrc)
                            proto, rest = splittype(imgsrc)
                            res, rest = splithost(rest)
                            propertys[key] = propertys[key].replace(
                                imgsrc, imgsrc.replace(res, 'img.zyai.top'))
                            logging.info('push a img link to queue %s .' %
                                         imgsrc)

        else:
            item = selector.xpath(item)[0]
            propertys[key] = item

    dataPersistenceType = spider.args['DataPersistenceType']

    if dataPersistenceType == 'WPRPC':
        wp = Client('http://tech.cocopass.com/xmlrpc.php', 'admin', '19841204')
        """
		发表博文
		"""
        post = WordPressPost()
        post.title = propertys['title'].encode('utf-8')
        post.content = propertys['content_raw'].encode('utf-8')
        post.post_status = 'publish'
        post.terms_names = {'post_tag': [post.title], 'category': ['爱好']}
        wp.call(NewPost(post))
        logging.info('successfully post one article: %s .' %
                     propertys['title'])

    elif dataPersistenceType == 'MYSQL':
        pass
コード例 #21
0
    def load(self, url):
        self.url = url

        self.protocol, s1 = urllib_parse.splittype(self.url)
        s2, self.path = urllib_parse.splithost(s1)
        self.host, self.port = urllib_parse.splitport(s2)

        if not self.port:
            if self.protocol == 'http':
                self.port = 80
            elif self.protocol == 'https':
                self.port = 443
コード例 #22
0
def url_split(url):
    """Split url in a tuple (scheme, hostname, port, document) where
    hostname is always lowercased.
    Precondition: url is syntactically correct URI (eg has no whitespace)
    """
    scheme, netloc = parse.splittype(url)
    host, document = parse.splithost(netloc)
    port = default_ports.get(scheme, 0)
    if host:
        host = host.lower()
        host, port = splitport(host, port=port)
    return scheme, host, port, document
コード例 #23
0
 def retry_https_basic_auth(self, url, realm, data=None):
     host, selector = splithost(url)
     i = host.find('@') + 1
     host = host[i:]
     user, passwd = self.get_user_passwd(host, realm, i)
     if not (user or passwd): return None
     host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
     newurl = 'https://' + host + selector
     if data is None:
         return self.open(newurl)
     else:
         return self.open(newurl, data)
コード例 #24
0
def getp():
    path = request.args.get('path')
    resp = requests.get(path, headers=headers, timeout=5)
    proto, rest = UrlPase.splittype(resp.url)
    host, rest = UrlPase.splithost(rest)
    if host == 'm.zwdu.com' or host == 'm.biqubao.com':
        resp.encoding = "GBK"
    else:
        resp.encoding = "utf-8"
    content = resp.text

    return content
コード例 #25
0
ファイル: jsonrpc.py プロジェクト: Zectbumo/jsonrpclib
    def __init__(
        self,
        uri,
        transport=None,
        encoding=None,
        verbose=0,
        version=None,
        headers=None,
        history=None,
        config=jsonrpclib.config.DEFAULT,
    ):
        """
        Sets up the server proxy

        :param uri: Request URI
        :param transport: Custom transport handler
        :param encoding: Specified encoding
        :param verbose: Log verbosity level
        :param version: JSON-RPC specification version
        :param headers: Custom additional headers for each request
        :param history: History object (for tests)
        :param config: A JSONRPClib Config instance
        """
        # Store the configuration
        self._config = config

        if not version:
            version = config.version
        self.__version = version

        schema, uri = splittype(uri)
        if schema not in ("http", "https"):
            raise IOError("Unsupported JSON-RPC protocol.")

        self.__host, self.__handler = splithost(uri)
        if not self.__handler:
            # Not sure if this is in the JSON spec?
            self.__handler = "/"

        if transport is None:
            if schema == "https":
                transport = SafeTransport(config=config)
            else:
                transport = Transport(config=config)
        self.__transport = transport

        self.__encoding = encoding
        self.__verbose = verbose
        self.__history = history

        # Global custom headers are injected into Transport
        self.__transport.push_headers(headers or {})
コード例 #26
0
ファイル: DLInfos.py プロジェクト: archive-j/iqiyi-parser
    def load(self, url):
        self.url = url

        self.protocol, s1 = splittype(self.url)
        s2, self.path = splithost(s1)
        self.host, port = splitport(s2)
        self.port = int(port) if port is not None else None

        if not self.port:
            if self.protocol == 'http':
                self.port = 80
            elif self.protocol == 'https':
                self.port = 443
コード例 #27
0
def all_port(list):
    port_num = 0
    for url in list:
        proto, rest = splittype(url)
        host, rest = splithost(rest)
        host, port = splitport(host)
        if str(port) == "None":
            pass
        else:
            port_num += 1
    if port_num > 0:
        return 1
    else:
        return 0
コード例 #28
0
    def __setattr__(self, key, value):
        object.__setattr__(self, key, value)
        if key == 'url':
            self.protocol, s1 = urllib_parse.splittype(self.url)
            if s1:
                s2, self.path = urllib_parse.splithost(s1)
                if s2:
                    self.host, self.port = urllib_parse.splitport(s2)

            if not getattr(self, 'port', None):
                if self.protocol == 'http':
                    self.port = 80
                elif self.protocol == 'https':
                    self.port = 443
コード例 #29
0
ファイル: util.py プロジェクト: rmsilva/tilenol
def fetchurl(url, query=None):
    if query is not None:
        assert '?' not in url, ("Either include query in url"
                                "or pass as parameter, but not both")
        url += '?' + urlencode(query)
    proto, tail = splittype(url)
    if proto != 'http':
        raise RuntimeError("Unsupported protocol HTTP")
    host, tail = splithost(tail)
    cli = HTTPClient(host)
    resp = cli.request(tail, headers={'Host': host})
    if resp.status.endswith('200 OK'):
        return resp.body
    raise RequestError(resp.status, resp)
コード例 #30
0
ファイル: DLInfos.py プロジェクト: archive-j/iqiyi-parser
    def __setattr__(self, key, value):
        object.__setattr__(self, key, value)
        if key == 'url':
            self.protocol, s1 = splittype(self.url)
            if s1:
                s2, self.path = splithost(s1)
                if s2:
                    self.host, port = splitport(s2)
                    self.port = int(port) if port is not None else None

            if not getattr(self, 'port', None):
                if self.protocol == 'http':
                    self.port = 80
                elif self.protocol == 'https':
                    self.port = 443
コード例 #31
0
    def process_request(self, headers, web, url):
        for key, value in base_heard.items():
            headers[key] = value
        headers['User-Agent'] = self.ua.random
        proto, rest = splittype(url)
        host, rest = splithost(rest)
        # host = WEB_SETTINGS[web]['host']
        if host[-1] == '/':
            host = host.split('/')[-2]
        else:
            host = host.split('/')[-1]
        headers['Host'] = host

        headers['Referer'] = WEB_SETTINGS[web]['search'].format('')
        headers['Upgrade-Insecure-Requests'] = '1'
コード例 #32
0
def fetchurl(url, query=None):
    if query is not None:
        assert '?' not in url, ("Either include query in url"
                                "or pass as parameter, but not both")
        url += '?' + urlencode(query)
    proto, tail = splittype(url)
    if proto != 'http':
        raise RuntimeError("Unsupported protocol HTTP")
    host, tail = splithost(tail)
    ip = gethub().dns_resolver.gethostbyname(host)
    cli = HTTPClient(ip)
    resp = cli.request(tail, headers={'Host': host})
    if resp.status.endswith('200 OK'):
        return resp.body
    raise RequestError(resp.status, resp)
コード例 #33
0
ファイル: tlwiki.py プロジェクト: dsp2003/e17p
 def stat_page(self):
    from urllib.parse import splittype, splithost
    from http.client import HTTPConnection
    
    url = self.get_url()
    self.log(20, 'Statting page {!r} at {!r}.'.format(self.name, url))
    
    (_, dp) = splittype(url)
    (host, path) = splithost(dp)
    conn = HTTPConnection(host)
    conn.request('HEAD', path)
    res = conn.getresponse()
    lmt_raw = res.getheader('last-modified')
    lm_dts = self._parse_http_dt(lmt_raw)
    
    return lm_dts
コード例 #34
0
ファイル: scgi.py プロジェクト: JackDandy/SickGear
    def __init__(self, uri, transport=None, encoding=None, verbose=False,
                 allow_none=False, use_datetime=False):
        type, uri = urlparser.splittype(uri)
        if type not in ('scgi'):
            raise IOError('unsupported XML-RPC protocol')
        self.__host, self.__handler = urlparser.splithost(uri)
        if not self.__handler:
            self.__handler = '/'

        if transport is None:
            transport = SCGITransport(use_datetime=use_datetime)
        self.__transport = transport

        self.__encoding = encoding
        self.__verbose = verbose
        self.__allow_none = allow_none
コード例 #35
0
    def stat_page(self):
        from urllib.parse import splittype, splithost
        from http.client import HTTPConnection

        url = self.get_url()
        self.log(20, 'Statting page {!r} at {!r}.'.format(self.name, url))

        (_, dp) = splittype(url)
        (host, path) = splithost(dp)
        conn = HTTPConnection(host)
        conn.request('HEAD', path)
        res = conn.getresponse()
        lmt_raw = res.getheader('last-modified')
        lm_dts = self._parse_http_dt(lmt_raw)

        return lm_dts
コード例 #36
0
ファイル: views.py プロジェクト: linchuan1982/web_server
def index(request):
    # print(request)
    url = request.GET.get('url', None)

    host = parse.splithost(parse.splittype(url)[1])[0]
    logger.info('host is {}'.format(host))
    source = None
    for tag in Source:
        if tag.name in host:
            source = tag.value
            break
    if source is None:
        return HttpResponseBadRequest('Unknown url {}'.format(url))

    # 更新数据库
    item, create_ = SearchUrl.objects.get_or_create(
        request_url=url, defaults=dict(source=source))
    return HttpResponse('ok')
コード例 #37
0
ファイル: __init__.py プロジェクト: jgsogo/python-oauth2
    def request(self, uri, method="GET", body='', headers=None, 
        redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None):
        DEFAULT_POST_CONTENT_TYPE = 'application/x-www-form-urlencoded'

        if not isinstance(headers, dict):
            headers = {}

        if method == "POST":
            headers['Content-Type'] = headers.get('Content-Type', 
                DEFAULT_POST_CONTENT_TYPE)

        is_form_encoded = \
            headers.get('Content-Type') == 'application/x-www-form-urlencoded'

        if is_form_encoded and body:
            parameters = parse_qs(body)
        else:
            parameters = None

        req = Request.from_consumer_and_token(self.consumer, 
            token=self.token, http_method=method, http_url=uri, 
            parameters=parameters, body=body, is_form_encoded=is_form_encoded)

        req.sign_request(self.method, self.consumer, self.token)

        schema, rest = splittype(uri)
        if rest.startswith('//'):
            hierpart = '//'
        else:
            hierpart = ''
        host, rest = splithost(rest)

        realm = schema + ':' + hierpart + host

        if is_form_encoded:
            body = req.to_postdata()
        elif method == "GET":
            uri = req.to_url()
        else:
            headers.update(req.to_header(realm=realm))

        return httplib2.Http.request(self, uri, method=method, body=body,
            headers=headers, redirections=redirections,
            connection_type=connection_type)
コード例 #38
0
ファイル: jsonrpc.py プロジェクト: youngmou/Elymus
    def __init__(self, uri, transport=None, encoding=None,
                 verbose=0, version=None, headers=None, history=None,
                 config=jsonrpclib.config.DEFAULT, context=None):
        """
        Sets up the server proxy

        :param uri: Request URI
        :param transport: Custom transport handler
        :param encoding: Specified encoding
        :param verbose: Log verbosity level
        :param version: JSON-RPC specification version
        :param headers: Custom additional headers for each request
        :param history: History object (for tests)
        :param config: A JSONRPClib Config instance
        :param context: The optional SSLContext to use
        """
        # Store the configuration
        self._config = config
        self.__version = version or config.version

        schema, uri = splittype(uri)
        if schema not in ('http', 'https'):
            _logger.error("jsonrpclib only support http(s) URIs, not %s",
                          schema)
            raise IOError('Unsupported JSON-RPC protocol.')

        self.__host, self.__handler = splithost(uri)
        if not self.__handler:
            # Not sure if this is in the JSON spec?
            self.__handler = '/'

        if transport is None:
            if schema == 'https':
                transport = SafeTransport(config=config, context=context)
            else:
                transport = Transport(config=config)
        self.__transport = transport

        self.__encoding = encoding
        self.__verbose = verbose
        self.__history = history

        # Global custom headers are injected into Transport
        self.__transport.push_headers(headers or {})
コード例 #39
0
ファイル: oscssl.py プロジェクト: adrianschroeter/osc
 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
     #putrequest is called before connect, so can interpret url and get
     #real host/port to be used to make CONNECT request to proxy
     proto, rest = splittype(url)
     if proto is None:
         raise ValueError("unknown URL type: %s" % url)
     #get host
     host, rest = splithost(rest)
     #try to get port
     host, port = splitport(host)
     #if port is not defined try to get from proto
     if port is None:
         try:
             port = self._ports[proto]
         except KeyError:
             raise ValueError("unknown protocol for: %s" % url)
     self._real_host = host
     self._real_port = int(port)
     M2Crypto.httpslib.HTTPSConnection.putrequest(self, method, url, skip_host, skip_accept_encoding)
コード例 #40
0
ファイル: urllib2.py プロジェクト: HishamKamal/code2flow
 def proxy_open(self, req, proxy, type):
     orig_type = req.get_type()
     type, r_type = splittype(proxy)
     host, XXX = splithost(r_type)
     if '@' in host:
         user_pass, host = host.split('@', 1)
         user_pass = base64.encodestring(unquote(user_pass)).strip()
         req.add_header('Proxy-Authorization', 'Basic '+user_pass)
     host = unquote(host)
     req.set_proxy(host, type)
     if orig_type == type:
         # let other handlers take care of it
         # XXX this only makes sense if the proxy is before the
         # other handlers
         return None
     else:
         # need to start over, because the other handlers don't
         # grok the proxy's URL type
         return self.parent.open(req)
コード例 #41
0
ファイル: jsonrpclib.py プロジェクト: billtsay/win-demo-opcua
    def __init__(self, uri, transport=None, encoding=None,
                 verbose=None, allow_none=0):
        utype, uri = splittype(uri)
        if utype not in ("http", "https"):
            raise IOError("Unsupported JSONRPC protocol")
        self.__host, self.__handler = splithost(uri)
        if not self.__handler:
            self.__handler = "/RPC2"

        if transport is None:
            if utype == "https":
                transport = SafeTransport()
            else:
                transport = Transport()
        self.__transport = transport

        self.__encoding = encoding
        self.__verbose = verbose
        self.__allow_none = allow_none
コード例 #42
0
ファイル: client.py プロジェクト: lsowen/ofxclient
    def post(self, query):
        i = self.institution
        logging.debug('posting data to %s' % i.url)
        logging.debug('---- request ----')
        logging.debug(query)
        garbage, path = splittype(i.url)
        host, selector = splithost(path)
        h = HTTPSConnection(host)
        h.request('POST', selector, query,
                  {
                      "Content-type": "application/x-ofx",
                      "Accept": "*/*, application/x-ofx"
                  })
        res = h.getresponse()
        response = res.read().decode('ascii', 'ignore')
        logging.debug('---- response ----')
        logging.debug(res.__dict__)
        logging.debug(response)
        res.close()

        return response
コード例 #43
0
ファイル: util.py プロジェクト: u2rafi/xhtml2pdf
    def __init__(self, uri, basepath=None):
        self.basepath = basepath
        self.mimetype = None
        self.file = None
        self.data = None
        self.uri = None
        self.local = None
        self.tmp_file = None
        uri = uri or str()
        if type(uri) != str:
            uri = uri.decode("utf-8")
        log.debug("FileObject %r, Basepath: %r", uri, basepath)

        # Data URI
        if uri.startswith("data:"):
            m = _rx_datauri.match(uri)
            self.mimetype = m.group("mime")
            self.data = base64.b64decode(m.group("data").encode("utf-8"))

        else:
            # Check if we have an external scheme
            if basepath and not urlparse.urlparse(uri).scheme:
                urlParts = urlparse.urlparse(basepath)
            else:
                urlParts = urlparse.urlparse(uri)

            log.debug("URLParts: {}".format((urlParts, urlParts.scheme)))

            if urlParts.scheme == 'file':
                if basepath and uri.startswith('/'):
                    uri = urlparse.urljoin(basepath, uri[1:])
                urlResponse = urllib2.urlopen(uri)
                self.mimetype = urlResponse.info().get(
                    "Content-Type", '').split(";")[0]
                self.uri = urlResponse.geturl()
                self.file = urlResponse

            # Drive letters have len==1 but we are looking
            # for things like http:
            elif urlParts.scheme in ('http', 'https'):

                log.debug("Sending request for {} with httplib".format(uri))

                # External data
                if basepath:
                    uri = urlparse.urljoin(basepath, uri)

                log.debug("Uri parsed: {}".format(uri))

                #path = urlparse.urlsplit(url)[2]
                #mimetype = getMimeType(path)

                # Using HTTPLIB
                server, path = urlparse.splithost(uri[uri.find("//"):])
                if uri.startswith("https://"):
                    conn = httplib.HTTPSConnection(server)
                else:
                    conn = httplib.HTTPConnection(server)
                conn.request("GET", path)
                r1 = conn.getresponse()
                # log.debug("HTTP %r %r %r %r", server, path, uri, r1)
                if (r1.status, r1.reason) == (200, "OK"):
                    self.mimetype = r1.getheader(
                        "Content-Type", '').split(";")[0]
                    self.uri = uri
                    log.debug("here")
                    if r1.getheader("content-encoding") == "gzip":
                        import gzip

                        self.file = gzip.GzipFile(
                            mode="rb", fileobj=six.StringIO(r1.read()))
                    else:
                        self.file = r1
                else:
                    log.debug("Received non-200 status: {}".format((r1.status, r1.reason)))
                    try:
                        urlResponse = urllib2.urlopen(uri)
                    except urllib2.HTTPError as e:
                        log.error("Could not process uri: {}".format(e))
                        return
                    self.mimetype = urlResponse.info().get(
                        "Content-Type", '').split(";")[0]
                    self.uri = urlResponse.geturl()
                    self.file = urlResponse

            else:

                log.debug("Unrecognized scheme, assuming local file path")

                # Local data
                if basepath:
                    uri = os.path.normpath(os.path.join(basepath, uri))

                if os.path.isfile(uri):
                    self.uri = uri
                    self.local = uri
                
                    self.setMimeTypeByName(uri)
                    if self.mimetype.startswith('text'):
                        self.file = open(uri, "r") #removed bytes... lets hope it goes ok :/
                    else:
                        self.file = open(uri, "rb") #removed bytes... lets hope it goes ok :/
コード例 #44
0
ファイル: urllib2.py プロジェクト: HishamKamal/code2flow
 def get_host(self):
     if self.host is None:
         self.host, self.__r_host = splithost(self.__r_type)
         if self.host:
             self.host = unquote(self.host)
     return self.host
コード例 #45
0
ファイル: jsonrpc.py プロジェクト: pymedusa/SickRage
    def __init__(self, uri, transport=None, encoding=None,
                 verbose=0, version=None, headers=None, history=None,
                 config=jsonrpclib.config.DEFAULT, context=None):
        """
        Sets up the server proxy

        :param uri: Request URI
        :param transport: Custom transport handler
        :param encoding: Specified encoding
        :param verbose: Log verbosity level
        :param version: JSON-RPC specification version
        :param headers: Custom additional headers for each request
        :param history: History object (for tests)
        :param config: A JSONRPClib Config instance
        :param context: The optional SSLContext to use
        """
        # Store the configuration
        self._config = config
        self.__version = version or config.version

        schema, uri = splittype(uri)
        use_unix = False
        if schema.startswith("unix+"):
            schema = schema[len("unix+"):]
            use_unix = True

        if schema not in ('http', 'https'):
            _logger.error("jsonrpclib only support http(s) URIs, not %s",
                          schema)
            raise IOError('Unsupported JSON-RPC protocol.')

        self.__host, self.__handler = splithost(uri)
        if use_unix:
            unix_path = self.__handler
            self.__handler = '/'
        elif not self.__handler:
            # Not sure if this is in the JSON spec?
            self.__handler = '/'

        if transport is None:
            if use_unix:
                if schema == "http":
                    # In Unix mode, we use the path part of the URL (handler)
                    # as the path to the socket file
                    transport = UnixTransport(
                        config=config, path=unix_path
                    )
            elif schema == 'https':
                transport = SafeTransport(config=config, context=context)
            else:
                transport = Transport(config=config)

            if transport is None:
                raise IOError(
                    "Unhandled combination: UNIX={}, protocol={}"
                    .format(use_unix, schema)
                )

        self.__transport = transport

        self.__encoding = encoding
        self.__verbose = verbose
        self.__history = history

        # Global custom headers are injected into Transport
        self.__transport.push_headers(headers or {})