Beispiel #1
0
def ubuntuPaste(poster='',
                screenshot='',
                code2='',
                klass='bash',
                filename=None):
    '''
  paste 到 http://paste.ubuntu.org.cn/
  screenshot 是文件路径

  返回查看此帖子的 URL (字符串)
  '''
    from httpsession import Session
    paste_url = 'http://paste.ubuntu.org.cn/'
    fields = [
        ('paste', 'send'),
        ('poster', poster),
        ('code2', code2),
        ('class', klass),
    ]
    if screenshot:
        files = (('screenshot', filename
                  or os.path.split(screenshot)[1], open(screenshot,
                                                        'rb').read()), )
    else:
        files = ()

    data = encode_multipart_formdata(fields, files)
    s = Session()
    r = s.request(paste_url,
                  data[1],
                  headers={
                      'Content-Type': data[0],
                      'Expect': '100-continue',
                  })
    return r.geturl()
Beispiel #2
0
def ubuntuPaste(poster='', screenshot='', code2='',
    klass='bash', filename=None):
  '''
  paste 到 http://paste.ubuntu.org.cn/
  screenshot 是文件路径

  返回查看此帖子的 URL (字符串)
  '''
  from httpsession import Session
  paste_url = 'http://paste.ubuntu.org.cn/'
  fields = [
    ('paste',  'send'),
    ('poster', poster),
    ('code2',  code2),
    ('class',  klass),
  ]
  if screenshot:
    files = (
      ('screenshot', filename or os.path.split(screenshot)[1], open(screenshot, 'rb').read()),
    )
  else:
    files = ()

  data = encode_multipart_formdata(fields, files)
  s = Session()
  r = s.request(paste_url, data[1], headers={
    'Content-Type': data[0],
    'Expect': '100-continue',
  })
  return r.geturl()
Beispiel #3
0
def ubuntuPaste(poster="", screenshot="", code2="", klass="bash", filename=None):
    """
  paste 到 http://paste.ubuntu.org.cn/
  screenshot 是文件路径

  返回查看此帖子的 URL (字符串)
  """
    from httpsession import Session

    paste_url = "http://paste.ubuntu.org.cn/"
    fields = [("paste", "send"), ("poster", poster), ("code2", code2), ("class", klass)]
    if screenshot:
        files = (("screenshot", filename or os.path.split(screenshot)[1], open(screenshot, "rb").read()),)
    else:
        files = ()

    data = encode_multipart_formdata(fields, files)
    s = Session()
    r = s.request(paste_url, data[1], headers={"Content-Type": data[0], "Expect": "100-continue"})
    return r.geturl()
Beispiel #4
0
def getTitle(url, headers={}, timeout=5):
    '''
  获取网页标题,url 要指定协议的

  如果字符串解码失败,返回 bytes
  如果不是网页返回 None

  可能出现的异常
    socket.error: [Errno 111] Connection refused
    socket.timeout: timed out
  '''
    # TODO 对 meta 刷新的处理
    import re
    import socket
    from httpsession import Session

    defaultheaders = {}
    defaultheaders[
        'User-Agent'] = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.6) Gecko/20100628 Ubuntu/10.04 (lucid) Firefox/3.6.6'
    defaultheaders[
        'Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.7'
    defaultheaders['Accept-Language'] = 'zh-cn,zh;q=0.5'
    defaultheaders['Accept-Charset'] = 'utf-8,gb18030;q=0.7,*;q=0.7'
    defaultheaders.update(headers)
    headers = defaultheaders

    s = Session()
    try:
        response = s.request(url, headers=headers)
    except socket.error:
        s = Session(proxy={
            'http': 'http://localhost:8087',
            'https': 'http://localhost:8087',
        })
        response = s.request(url, headers=headers)

    contentType = response.getheader('Content-Type', default='text/html')
    type = contentType.split(';', 1)[0]
    if type.find('html') == -1 and type.find('xml') == -1:
        return None

    try:
        charset = contentType.rsplit('=', 1)[1]
    except IndexError:
        charset = None

    title = b''
    content = b''
    for i in range(300):
        content += response.read(64)
        if len(content) < 64:
            break
        m = re.search(b'<title[^>]*>([^<]*)<', content, re.IGNORECASE)
        if m:
            title = m.group(1)
            break
    response.close()

    if charset is None:
        import chardet
        title = title.decode(chardet.detect(title)['encoding'])
    else:
        if charset.lower().find('big5') != -1:
            charset = 'big5'
        elif charset.lower() == 'windows-31j':
            charset = 'cp932'
        title = title.decode(charset)
    title = htmlutils.entityunescape(title.replace('\n', '')).strip()

    return title or None
Beispiel #5
0
def getTitle(url, headers={}, timeout=5):
    """
  获取网页标题,url 要指定协议的

  如果字符串解码失败,返回 bytes
  如果不是网页返回 None

  可能出现的异常
    socket.error: [Errno 111] Connection refused
    socket.timeout: timed out
  """
    # TODO 对 meta 刷新的处理
    import re
    import socket
    from httpsession import Session

    defaultheaders = {}
    defaultheaders[
        "User-Agent"
    ] = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.6) Gecko/20100628 Ubuntu/10.04 (lucid) Firefox/3.6.6"
    defaultheaders["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.7"
    defaultheaders["Accept-Language"] = "zh-cn,zh;q=0.5"
    defaultheaders["Accept-Charset"] = "utf-8,gb18030;q=0.7,*;q=0.7"
    defaultheaders.update(headers)
    headers = defaultheaders

    s = Session()
    try:
        response = s.request(url, headers=headers)
    except socket.error:
        s = Session(proxy={"http": "http://localhost:8087", "https": "http://localhost:8087"})
        response = s.request(url, headers=headers)

    contentType = response.getheader("Content-Type", default="text/html")
    type = contentType.split(";", 1)[0]
    if type.find("html") == -1 and type.find("xml") == -1:
        return None

    try:
        charset = contentType.rsplit("=", 1)[1]
    except IndexError:
        charset = None

    title = b""
    content = b""
    for i in range(300):
        content += response.read(64)
        if len(content) < 64:
            break
        m = re.search(b"<title[^>]*>([^<]*)<", content, re.IGNORECASE)
        if m:
            title = m.group(1)
            break
    response.close()

    if charset is None:
        import chardet

        title = title.decode(chardet.detect(title)["encoding"])
    else:
        if charset.lower().find("big5") != -1:
            charset = "big5"
        title = title.decode(charset)
    title = htmlutils.entityunescape(title.replace("\n", "")).strip()

    return title or None
Beispiel #6
0
def getTitle(url, headers={}, timeout=5):
  '''
  获取网页标题,url 要指定协议的

  如果字符串解码失败,返回 bytes
  如果不是网页返回 None

  可能出现的异常
    socket.error: [Errno 111] Connection refused
    socket.timeout: timed out
  '''
  # TODO 对 meta 刷新的处理
  import re
  import socket
  from httpsession import Session

  defaultheaders = {}
  defaultheaders['User-Agent'] = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.6) Gecko/20100628 Ubuntu/10.04 (lucid) Firefox/3.6.6'
  defaultheaders['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.7'
  defaultheaders['Accept-Language'] = 'zh-cn,zh;q=0.5'
  defaultheaders['Accept-Charset'] = 'utf-8,gb18030;q=0.7,*;q=0.7'
  defaultheaders.update(headers)
  headers = defaultheaders

  s = Session()
  try:
    response = s.request(url, headers=headers)
  except socket.error:
    s = Session(proxy={
      'http':  'http://localhost:8087',
      'https': 'http://localhost:8087',
    })
    response = s.request(url, headers=headers)

  contentType = response.getheader('Content-Type', default='text/html')
  type = contentType.split(';', 1)[0]
  if type.find('html') == -1 and type.find('xml') == -1:
    return None

  try:
    charset = contentType.rsplit('=', 1)[1]
  except IndexError:
    charset = None

  title = b''
  content = b''
  for i in range(300):
    content += response.read(64)
    if len(content) < 64:
      break
    m = re.search(b'<title[^>]*>([^<]*)<', content, re.IGNORECASE)
    if m:
      title = m.group(1)
      break
  response.close()

  if charset is None:
    import chardet
    title = title.decode(chardet.detect(title)['encoding'])
  else:
    if charset.lower().find('big5') != -1:
      charset = 'big5'
    elif charset.lower() == 'windows-31j':
      charset = 'cp932'
    title = title.decode(charset)
  title = htmlutils.entityunescape(title.replace('\n', '')).strip()

  return title or None