Ejemplo n.º 1
0
def get_html_string(url):
    '''
   This method takes a url (string) of a webpage, and connects to the URL,
   then downloads, htmldecodes, and returns the contents of that page as 
   an html string.
   
   This method will throw an WebException or IOException if anything goes wrong,
   including if the response code is not valid (i.e. 200).
   '''

    try:
        ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12
        request = WebRequest.Create(url)
        request.UserAgent = "[ComicVineScraper, version " + \
           Resources.SCRIPT_VERSION + "]"
        response = request.GetResponse()
        # if the response code is not "OK", throw a web exception immediately.
        # this stops red-herring errors later on as we try to parse bad results.
        # usually this only happens if the CV server is temporarily down.
        if response.StatusCode != HttpStatusCode.OK:
            raise WebException("server response code " +
                               sstr(int(response.StatusCode)) + " (" +
                               sstr(response.StatusCode) + ")")
        responseStream = response.GetResponseStream()
        reader = StreamReader(responseStream, Encoding.UTF8)
        page = reader.ReadToEnd()
        with StringWriter() as writer:
            HttpUtility.HtmlDecode(page, writer)
            page = writer.ToString()
        return page
    finally:
        if 'reader' in vars(): reader.Close()
        if 'responseStream' in vars(): responseStream.Close()
        if 'response' in vars(): response.Close()
Ejemplo n.º 2
0
def get_html_string(url):
   '''
   This method takes a url (string) of a webpage, and connects to the URL,
   then downloads, htmldecodes, and returns the contents of that page as 
   an html string.
   
   This method will throw an WebException or IOException if anything goes wrong,
   including if the response code is not valid (i.e. 200).
   '''
   
   try:
      ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12
      request = WebRequest.Create(url)
      # latest version of comicvine api insists on a user agent (see bug #484).
      # previously, it didn't like my non-standard user agent, I think (see bug #471).
      # now, I have a standard user agent, which comicvine api seems to like.
      request.UserAgent = "ComicVineScraper/" + \
         Resources.SCRIPT_VERSION + " (https://github.com/cbanack/comic-vine-scraper/)" 
      response = request.GetResponse()
      # if the response code is not "OK", throw a web exception immediately.
      # this stops red-herring errors later on as we try to parse bad results.
      # usually this only happens if the CV server is temporarily down.
      if response.StatusCode != HttpStatusCode.OK:
         raise WebException("server response code " + 
            sstr(int(response.StatusCode))+" ("+sstr(response.StatusCode)+")" )
      responseStream = response.GetResponseStream()
      reader = StreamReader(responseStream, Encoding.UTF8)
      page = reader.ReadToEnd()
      with StringWriter() as writer: 
         HttpUtility.HtmlDecode(page, writer)
         page = writer.ToString()
      return page
   finally:
      if 'reader' in vars(): reader.Close()
      if 'responseStream' in vars(): responseStream.Close()
      if 'response' in vars(): response.Close()