Ejemplo n.º 1
0
    def downloadHisa(self):
        self.signals.status.emit("Downloading HiSA, please wait...")

        self.user_dir = CfdTools.runFoamCommand(
            "echo $WM_PROJECT_USER_DIR").rstrip().split('\n')[-1]
        self.user_dir = CfdTools.reverseTranslatePath(self.user_dir)

        try:
            # Workaround for certificate issues in python >= 2.7.9
            import ssl
            if hasattr(ssl, '_create_unverified_context'):
                urlrequest._urlopener = urlrequest.FancyURLopener(
                    context=ssl._create_unverified_context())
            else:
                urlrequest._urlopener = urlrequest.FancyURLopener()
            # Download
            (filename,
             header) = urlrequest.urlretrieve(self.hisa_url,
                                              reporthook=self.downloadStatus)
        except Exception as ex:
            raise Exception("Error downloading HiSA: {}".format(str(ex)))

        self.signals.status.emit("Extracting HiSA...")
        CfdTools.runFoamCommand(
            '{{ mkdir -p "$WM_PROJECT_USER_DIR" && cd "$WM_PROJECT_USER_DIR" && unzip -o "{}"; }}'
            .format(CfdTools.translatePath(filename)))
Ejemplo n.º 2
0
 def run(self):
     try:
         request = rq.FancyURLopener({})
         with request.open(self.url) as url_opener:
             self.result = url_opener.read().decode('utf-8')
     except urllib.error:
         self.result = ''
     finally:
         self.Finished.emit(self.ref, self.result)
Ejemplo n.º 3
0
 def run(self):
     try:
         request = rq.FancyURLopener({})
         with request.open(self.url) as url_opener:
             result = json.loads(url_opener.read().decode('utf-8'))
             self.config = result['config']
     except urllib.error:
         self.result = ''
     finally:
         self.Finished.emit(self.prefix, json.dumps(self.config))
Ejemplo n.º 4
0
def get_emoticons_dict(url="https://pc.net/emoticons/"):
    opener = ureq.FancyURLopener({})
    f = opener.open(url)
    content = f.read()
    soup = BeautifulSoup(content, "html")
    emoticons_html_tags = soup.find_all(class_="smiley")

    emoticons_dict = dict()
    for t in emoticons_html_tags:
        label = t.a.attrs["href"].split("/")[1]
        emoticons_dict[t.text] = label
    return emoticons_dict
Ejemplo n.º 5
0
def downloadFile(url, dest):
    msg("downloading " + dest)
    try:
        if sys.version_info < (3,0):
            file = urllib.FancyURLopener()
            file.retrieve(url, dest)
        else:
            from urllib import request
            file = request.FancyURLopener()
            file.retrieve(url, dest)
    except Exception as e:
        msg('could not download "' + dest + '" because of this: ' + str(e), "RED")
        sys.exit(1)
Ejemplo n.º 6
0
 def run(self):
     try:
         request = rq.FancyURLopener({})
         with request.open(self.searchUrl) as url_opener:
             result = json.loads(url_opener.read().decode('utf-8'))
         
             if not 'response' in result or result['response']['numFound'] == 0:
                 answer = ('')
             else:
                 answer = json.dumps(result['response']['docs'])
                 
     except:
         answer = ''
     finally:
         self.Finished.emit(answer)
Ejemplo n.º 7
0
 def run(self):
     opener = urllib.FancyURLopener()
     opener.http_error_default = self.http_error_default
     self.start_progress_bar()
     try:
         if self.progress:
             filename, info = opener.retrieve(self.url, self.destination,
                                              self.progress_bar)
         else:
             filename, info = opener.retrieve(self.url, self.destination)
     except IOError as err:
         self.error_progress_bar()
         log.error(err)
         if not self.ignore_errors:
             raise
     self.success_progress_bar()
Ejemplo n.º 8
0
    def _handle_url_download(self, update_url, pack_basename, temp_packpath):
        if 'github.com/' in update_url:
            owner, repo = _get_github_owner_repo(update_url)
            update_url = '/'.join(
                ['https://github.com', owner, repo, 'archive'])
        update_url = _add_slash(update_url)
        update_url += pack_basename

        from urllib import request
        try:
            request.FancyURLopener().retrieve(update_url, temp_packpath,
                                              self._download_status)
        except Exception as error:
            log.error(traceback.format_exc().strip())
            self._error('Error retrieving package...\n' + str(error))
            return False
        return True
Ejemplo n.º 9
0
    def download(self, url, headers, proxy, num_retries):
        print('downloading:', url)

        self.thethrottle.wait(url)
        opener = request.FancyURLopener(proxy)
        opener.addheaders = headers
        try:
            with opener.open(url) as f:
                html = f.read().decode()
        except urllib.error.URLError as e:
            print('downloading failes:', e.reason)
            html = None
            if num_retries > 0:
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    Download(url, headers, proxy, num_retries - 1)

        return html
Ejemplo n.º 10
0
def get_pdf_info(url):
    password = ''  # If any password set for pdf opening
    page_no = set()
    max_pages = 10  # Set the maximum pages to be extracted
    caching = True
    la_params = LAParams()  # Performing Layout Analysis
    output_fp1 = StringIO()
    resource_object = PDFResourceManager(
        caching=caching
    )  # Used to store shared resources such as fonts or images in pdf.
    opener = request.FancyURLopener({})

    # Adding headers for opening pdf online if needed

    opener.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0'
    ), ('Connection', 'keep-alive')]
    fp = opener.open(url)
    device = TextConverter(resource_object, output_fp1, laparams=la_params)

    f = process_pdf(
        resource_object,
        device,
        fp,
        page_no,
        maxpages=max_pages,
        password=password,
        caching=caching,
        check_extractable=True,
    )

    # Task done by process_pdf
    # 1.Process pdf - first create a parse object for the corresponding pdf
    # 2.Bypassing the password protected document
    # 3.Checking if the given pdf is extractable or not
    # 4.If extractable parse the pages till max_pages limit given

    fp.close()
    device.close()
    ctx = get_paragraphs(output_fp1.getvalue().strip())
    return ctx
Ejemplo n.º 11
0
 def downloadAllAds(self, ad_index, NUMBER_OF_WEBPAGES, NUMBER_OF_ADS, startingwebpage, baseURL):
     # This function have been used with the following parameters:
     # ad_index: 495
     # NUMBER_OF_WEBPAGES = 1500
     # NUMBER_OF_ADS = 15000
     # startingwebpage = 495
     # baseURL = "https://www.immobiliare.it/vendita-case/roma/?criterio=rilevanza&pag="
     # instatiate the object needed to download the html file given a specific URL
     opener = request.FancyURLopener({})
     # for each webpage downloaded
     for webpageId in tqdm(range(startingwebpage, NUMBER_OF_WEBPAGES + 1)):
         # download the page at https://www.immobiliare.it/vendita-case/roma/?criterio=rilevanza&pag=[webpageId]
         webPageHtml = self._downloadURL(url=baseURL + str(webpageId), opener=opener)
         # create the html parser using the library BeautifulSoup
         htmlParser = self._createHtmlParser(webPageHtml=webPageHtml)
         # retrieve all the ads
         allAds = self._retrieveListOfAds(htmlParser=htmlParser)
         # Retrieve all the links pointing to the single house ad which are contained inside the html page
         allLinks = [buildLink(link=self._getAdLink(adTag=ad), websiteURL=self.websiteURL) for ad in allAds]
         # for each link which points to the house ad
         for linkAd in allLinks:
             # increase the index of the ad
             ad_index += 1
             # download the html file of the advertisement webpage
             htmlFile = downloadURL(url=linkAd, opener=opener)
             # create the html parser using the library BeautifulSoup
             soup = self._createHtmlParser(webPageHtml=htmlFile)
             # write the html
             self._writeHtml(html=soup.prettify(), ad_index=ad_index)
             # update the counter
             count_webpages += 1
             # if the number of webpages downloaded exceed the number of ads
             # requested as input, then the procedure is early stopped
             if (count_webpages >= NUMBER_OF_ADS):
                 print("[LOG]:All the necessary webpages have been downloaded")
                 return
     print("[LOG]: " + str(count_webpages) + " webpages have been downloaded")
# -*- coding: utf-8 -*-
from urllib import request

proxy_handler = request.ProxyHandler({'http': '10.144.1.10:8080'})
# proxy_auth_handler = request.ProxyBasicAuthHandler()
# proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
opener = request.build_opener(proxy_handler)
with opener.open('http://www.pythonchallenge.com/') as f:
    print('Status:', f.status, f.reason)

proxies = {'http': 'http://10.144.1.10:8080/'}
opener = request.FancyURLopener(proxies)  # 使用指定代理覆盖当前环境设置
with opener.open("http://www.pythonchallenge.com/") as f:
    print('Status:', f.status, f.reason)

with request.urlopen('https://www.bing.com') as f:
    data = f.read().decode('utf-8')  # urlopen返回的对象支持close、read、readline、readlines和迭代等
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    # print('Data:', data.decode('utf-8'))

# ### 标准库urllib模块
# - URL handling modules
# - https://docs.python.org/3/library/urllib.html
Ejemplo n.º 13
0
 def __init__(self, api_key, api_secret, *, verbose=False):
     self.verbose = verbose
     self.api_key = api_key
     self.api_secret = api_secret
     self.opener = request.FancyURLopener()
Ejemplo n.º 14
0
 def download_work(self, video_url, video_name):
     opener = request.FancyURLopener()
     opener.retrieve(video_url, video_name)