Python urlcleanup Exemples, urllib.request.urlcleanup Python Exemples

Exemple #1

0

Afficher le fichier

def Download(index):
    try:
        print('\r' + 'Loading...\n', end='', flush=True)
        for i in range(int(index)):
            url = r"https://picsum.photos/xxx/yyy"
            size_x = [
                640, 800, 800, 960, 1024, 1024, 1152, 1280, 1280, 1280, 1366,
                1400, 1440, 1440, 1600, 1600, 1680, 1856, 1920, 1920, 1920,
                2048, 2560, 2560, 3840
            ]
            size_y = [
                480, 576, 600, 648, 720, 720, 768, 768, 800, 900, 900, 960,
                1050, 1050, 1080, 1080, 1200, 1200, 1392, 1440, 1440, 1536,
                1600, 2160, 4320
            ]
            data = url.replace('xxx',
                               str(size_x[random.randint(0,
                                                         len(size_x) - 1)]))
            data = data.replace(
                'yyy', str(size_y[random.randint(0,
                                                 len(size_y) - 1)]))
            #print(data)
            if not os.path.exists('images'):
                os.makedirs('images')
            imgfilename = 'images/' + datetime.datetime.today().strftime(
                '%d%m%Y-%H%M%S%f%p') + '.jpg'
            req.urlretrieve(data + '.jpg', imgfilename)
            req.urlcleanup()
        print('\r' + '\nCompleted\n', end='', flush=True)
    except KeyboardInterrupt:
        print("\n[0] Force Exit Command Executed\n")
        req.urlcleanup()
        exit()

Exemple #2

0

Afficher le fichier

def getPic(pageUrl, savingPath):
    """
    抓取所有图片
    """
    # global urlList, nameList
    html = urlopen(pageUrl)
    bsObj = BeautifulSoup(html, features='html.parser')

    img = bsObj.find("div", {
        'class': 'picsbox picsboxcenter'
    }).find("img", {"src": re.compile("https://img\.lovebuy99\.com.*jpg$")})
    imgName = bsObj.find("h1").get_text()  # 获取图片名字
    imgUrl = img.attrs['src']
    # print(imgName)
    # print(imgUrl)
    # urlList.append(imgUrl)
    # nameList.append(imgName)
    # if not os.path.exists(savingPath):  # 为每一model创建文件夹
    #     os.mkdir(savingPath)
    urlretrieve(
        imgUrl, filename=savingPath + imgName +
        ".jpg")  # 下载图片，注意filename格式，前面必须要加保存路径，才能将图片保存在指定文件夹中，否则将保存在当前文件夹
    urlcleanup()

    # 抓取下一页的图片
    next_page = bsObj.find('a', text=u"下一页")
    next_pageUrl = urljoin(pageUrl, next_page.attrs['href'])
    # print(next_pageUrl)
    if next_page.attrs['href'] != "#":
        time.sleep(1)  # 暂停1秒，避免网站访问过快被反爬机制认为是蜘蛛
        getPic(next_pageUrl, savingPath)

Exemple #3

0

Afficher le fichier

    def download_file(self, url, filename, cwd=None):
        """
        Download an `url` to `outfn`
        """
        if not url:
            return

        def report_hook(index, blksize, size):
            if size <= 0:
                progression = '{0} bytes'.format(index * blksize)
            else:
                progression = '{0:.2f}%'.format(index * blksize * 100. /
                                                float(size))
            stdout.write('- Download {}\r'.format(progression))
            stdout.flush()

        if cwd:
            filename = join(cwd, filename)
        if exists(filename):
            unlink(filename)

        # Clean up temporary files just in case before downloading.
        urlcleanup()

        print('Downloading {0}'.format(url))
        urlretrieve(url, filename, report_hook)
        return filename

Exemple #4

0

Afficher le fichier

    def _get_genomes(self):
        """Parse genomes from assembly summary txt files."""
        genomes = []

        names = [
            "assembly_summary_refseq.txt",
            "assembly_summary_genbank.txt",
            "assembly_summary_refseq_historical.txt",
        ]

        sys.stderr.write(
            "Downloading assembly summaries from NCBI, " + "this will take a while...\n"
        )
        seen = {}
        for fname in names:
            urlcleanup()
            with urlopen(os.path.join(self.assembly_url, fname)) as response:
                lines = response.read().decode("utf-8").splitlines()
            header = lines[1].strip("# ").split("\t")
            for line in lines[2:]:
                vals = line.strip("# ").split("\t")
                # Don't repeat samples with the same asn_name
                if vals[15] not in seen:  # asn_name
                    genomes.append(dict(zip(header, vals)))
                    seen[vals[15]] = 1

        return genomes

Exemple #5

0

Afficher le fichier

Fichier : ovsg.py Projet : Koromix/ovsg

def download_podcast(podcast):
    try:
        if 'part' in podcast:
            filename = LOCAL_PATTERN_MULTI.format(**podcast)
        else:
            filename = LOCAL_PATTERN.format(**podcast)

        if os.path.exists(filename):
            return

        print('+', filename, '==> ', end='')

        mp3_url = find_mp3_url(podcast['url'])
        print(mp3_url)

        try:
            os.makedirs(os.path.dirname(filename))
        except OSError:
            pass

        tempfile, _ = urlretrieve(mp3_url, reporthook=download_progress)
        print() # Go past progress bar

        shutil.move(tempfile, filename)
    except Exception:
        print('FAILED')
    finally:
        urlcleanup()

Exemple #6

0

Afficher le fichier

    def getImg(self):
        try:
            reg = r'src="(.+?\.jpg)"'
            imglist = re.findall(reg, self.html)
            if (len(imglist)):
                for i in range(len(imglist)):
                    imglist[i] = 'http://www.fzlu.com' + imglist[i]
                for imgurl in imglist:
                    try:

                        request.urlretrieve(
                            imgurl,
                            'E:/photo/%d.jpg' % (self.num + self.baseImgNum))
                        print('已保存第%d张图片' % (self.num + self.baseImgNum))
                        print(threading.current_thread())
                        self.allImgList.append(imgurl)
                        if self.num == 10:
                            break
                    except Exception as e:
                        print(e.args)
                request.urlcleanup()

        except Exception as e:
            print("正则表达式异常")
        return self.num, self.allImgList

Exemple #7

0

Afficher le fichier

    def _post_process_download(self, name, localname, out_dir, mask="soft"):
        """
        Replace accessions with sequence names in fasta file.

        Parameters
        ----------
        name : str
            NCBI genome name

        out_dir : str
            Output directory
        """
        # Get the FTP url for this specific genome and download
        # the assembly report
        for genome in self.genomes:
            if name in [genome["asm_name"], genome["asm_name"].replace(" ", "_")]:
                url = genome["ftp_path"]
                url += "/" + url.split("/")[-1] + "_assembly_report.txt"
                url = url.replace("ftp://", "https://")
                break

        # Create mapping of accessions to names
        tr = {}
        urlcleanup()
        with urlopen(url) as response:
            for line in response.read().decode("utf-8").splitlines():
                if line.startswith("#"):
                    continue
                vals = line.strip().split("\t")
                tr[vals[6]] = vals[0]

        localname = get_localname(name, localname)
        # Check of the original genome fasta exists
        fa = os.path.join(out_dir, "{}.fa".format(localname))
        if not os.path.exists(fa):
            raise Exception("Genome fasta file not found, {}".format(fa))

        # Use a tmp file and replace the names
        new_fa = os.path.join(out_dir, ".process.{}.fa".format(localname))
        if mask != "soft":
            sys.stderr.write(
                "NCBI genomes are softmasked by default. Changing mask...\n"
            )

        with open(fa) as old:
            with open(new_fa, "w") as new:
                for line in old:
                    if line.startswith(">"):
                        desc = line.strip()[1:]
                        name = desc.split(" ")[0]
                        new.write(">{} {}\n".format(tr.get(name, name), desc))
                    elif mask == "hard":
                        new.write(re.sub("[actg]", "N", line))
                    elif mask not in ["hard", "soft"]:
                        new.write(line.upper())
                    else:
                        new.write(line)

        # Rename tmp file to real genome file
        shutil.move(new_fa, fa)

Exemple #8

0

Afficher le fichier

Fichier : pl2_printer.py Projet : zgp-github/mptool

    def print_pl2_label(self, macaddress):
        mac = macaddress
        data = {"message": "printing PL2 label"}
        self._signal_printer.emit(data)
        sleep(0.5)
        try:
            url = self.corelight.get_pl2_label_download_url(mac)
            filename = os.path.basename(url)
            filepath = os.path.join(os.getcwd(), filename)
            urlretrieve(url, filepath, self.download_callback)
            print("download pl2:", url, filename, filepath)

            if self.download_percent == 100:
                self.download_percent = 0

            if os.path.exists(filepath):
                self.printing(filepath)
                sleep(0.1)
                data = {
                    "message": "print PL2 label success",
                    "filepath": filepath
                }
                self._signal_printer.emit(data)
                sleep(0.5)
                os.unlink(filepath)
        except Exception as e:
            print(e)
            data = {"message": "print PL2 label fail"}
            self._signal_printer.emit(data)
        finally:
            urlcleanup()

Exemple #9

0

Afficher le fichier

def tmploop_get_remote_files(i,matching,user,pw,
                            server,remote_path,
                            path_local):
    """
    Function to download files using ftp. Tries 10 times before failing.
    """
    print("File: ",matching[i])
    print("src path: ", remote_path)
    pw = quote(pw) # to escape special characters
    dlstr=('ftp://' + user + ':' + pw + '@'
                + server + remote_path + matching[i])
    for attempt in range(10):
        print ("Attempt to download data: ")
        try:
            print ("Downloading file")
            urlretrieve(dlstr, os.path.join(path_local, matching[i]))
            urlcleanup()
        except Exception as e:
            print (e.__doc__)
            print (e.message)
            print ("Waiting for 10 sec and retry")
            time.sleep(10)
        else:
            break
    else:
        print ('An error was raised and I ' +
              'failed to fix problem myself :(')
        print ('Exit program')
        sys.exit()

Exemple #10

0

Afficher le fichier

def main(url, args):
    ''' Convert given tar file to conda packages '''

    cwd = os.getcwd()

    args.output_folder = os.path.abspath(args.output_folder)

    os.makedirs(args.output_folder, exist_ok=True)

    args.recipe_dir = os.path.abspath(args.recipe_dir)
    if os.path.isfile(args.recipe_dir):
        args.recipe_dir = os.path.dirname(args.recipe_dir)

    download = False
    if url.startswith('http:') or url.startswith('https:'):
        print_err('> download %s' % url)
        download = True
        url, headers = urlretrieve(url)
        urlcleanup()
    elif os.path.exists(url):
        url = os.path.abspath(url)

    with TemporaryDirectory() as temp:

        with tarfile.open(url, url.endswith('tar') and 'r:' or 'r:gz') as tar:
            tar.extractall(temp)

        # Clean up
        if download:
            urlcleanup()

        os.chdir(temp)

        url = os.path.join(temp, glob.glob('R-swat*')[0])

        # Create conda package for each R version
        for base, versions in get_supported_versions(args.platform).items():

            for ver in versions:
                update_recipe(args.recipe_dir,
                              url=url,
                              version=get_version(url),
                              r_base='{}-base'.format(base),
                              r_version=ver)

                cmd = ['conda', 'build', '-q', '--no-test']
                cmd.extend(['--R', ver])
                if args.output_folder:
                    cmd.extend(['--output-folder', args.output_folder])
                if args.override_channels:
                    cmd.append('--override-channels')
                if args.channel:
                    for chan in args.channel:
                        cmd.extend(['--channel', chan])
                cmd.append(args.recipe_dir)

                print_err('>' + ' '.join(cmd))
                subprocess.check_output(cmd)

    os.chdir(cwd)

Exemple #11

0

Afficher le fichier

    def fetch_from_PDB(self):
        """
        Connects to PDB FTP server, downloads a .gz file of interest,
        decompresses the .gz file into .ent and then dumps a copy of
        the pdb{code}.ent file into cwd.

        Parameters
        ----------
        None

        Examples
        --------
        
        >>> inst = PDBFile('1rcy')
        >>> path_to_file = inst.fetch_from_PDB()
        >>> print(path_to_file)
        
        """

        subdir = self.code[1:3]
        infile = 'pdb{}.ent.gz'.format(self.code)
        decompressed = infile.strip('.gz')
        fullpath = ROOT.format(subdir, infile)

        try:
            urlcleanup()
            urlretrieve(fullpath, infile)
        except Exception:
            return 'URLError'
        else:
            with gzip.open(infile, 'rb') as gz:
                with open(decompressed, 'wb') as out:
                    out.writelines(gz)
            remove(infile)
            return path.join(getcwd(), decompressed)

Exemple #12

0

Afficher le fichier

Fichier : upy_updater.py Projet : jakmaddox/upy

 def checkForUpdate(self,):
     # check on web if update available
     # return boolean for update_PMV,update_ePMV and update_pyubics
     # where to check and what type dev/stable/host
     self.new_version = self.liste_plugin
     self.update_notes = ""
     self.result_json = None
     # need version
     URI = self.url
     tmpFileName = "update_notes_" + self.host + ".json"
     #        if not os.path.isfile(tmpFileName):
     urllib.urlcleanup()
     if checkURL(URI):
         urllib.urlretrieve(URI, tmpFileName)  # ,reporthook=self.helper.reporthook)
         # geturl(URI, tmpFileName)
     else:
         print("problem connecting to server")
         return None
     with open(tmpFileName, "r") as fp:  # doesnt work with symbol link ?
         self.result_json = json.load(fp)
     do_update = []
     for plug in self.liste_plugin:
         self.liste_plugin[plug]["update"] = False
         if self.liste_plugin[plug]["version_current"] != self.result_json[plug]["version_" + self.typeUpdate]:
             if self.result_json[plug]["host"] == ["all"] or self.host in self.result_json[plug]["host"]:
                 self.liste_plugin[plug]["update"] = True
         self.liste_plugin[plug]["host"] = self.result_json[plug]["host"]
         do_update.append(self.liste_plugin[plug]["update"])
     self.update_notes = self.result_json["notes"]
     # self.merge_list_plug()
     print(self.update_notes)
     os.remove(tmpFileName)
     return do_update

Exemple #13

0

Afficher le fichier

def getdocument(url):
    try:
        return req.urlopen(url)
    except req.URLError as e:
        print(e.reason)
        req.urlcleanup()
    sys.exit(1)

Exemple #14

0

Afficher le fichier

Fichier : ffmpegCorruptionBot.py Projet : Dsm0/ffmpegCorruptionBot

def prepare_file(uplodad=False):
    if uplodad:
        for file in glob("out/**"):
            if os.path.isfile(file):
                try:
                    os.unlink(file)
                except Exception:
                    pass
    i = 0
    while i < 8:
        print("on loop", i + 1)
        submitter = "me"
        vid = sys.argv[1]
        duration = get_duration(vid)
        print(duration)
        start = 0
        if duration > DURATION:
            start = int(random.random() * (duration - DURATION))
            duration = DURATION
        filename, info_text, failed = v_glitch(
            vid, i, submitter, start, duration
        )
        urlcleanup()
        if failed:
            print("something went wrong")
            print("FAILED:", info_text)
        else:
            print("filename if'ed")
            print(info_text)
            print("new path:", filename)
            vid = filename
            i += 1
    return info_text, filename

Exemple #15

0

Afficher le fichier

def fetch():
    url = 'https://us.battle.net/forums/en/wow/1011639/'

    results = []

    req = Request(url)
    try:
        urlcleanup()
        response = urlopen(req)
    except URLError as e:
        if hasattr(e, 'reason'):
            print('   We failed to reach a server.')
            print('   Reason: ', e.reason)
        elif hasattr(e, 'code'):
            print('    The server couldn\'t fulfill the request.')
            print('    Error code: ', e.code)
        return None, None
    else:
        html = response.read()
        soup = BeautifulSoup(html, 'html.parser')

        # get text
        text = soup.findAll('a', attrs={'class': 'ForumTopic'})

        for topic in text:
            title = topic.find('span', attrs={'class': 'ForumTopic-title'})
            title = re.sub(r'[^\x00-\x7F]+', ' ', title.text.strip())
            if re.search(r'^[^<>]+$', title):
                results.append((topic['href'][21:], title))

        return results

Exemple #16

0

Afficher le fichier

Fichier : ScrapeRequestium.py Projet : EricBlumenstock/ImageScraper

def download_images(imgs: [], path: str, c=0):

    for i in imgs:
        urllib.urlretrieve(i, os.path.join(path, str(c) + '.png'))
        c = c + 1

    urllib.urlcleanup()

Exemple #17

0

Afficher le fichier

Fichier : toolchain.py Projet : cbenhagen/kivy-ios

    def download_file(self, url, filename, cwd=None):
        """
        Download an `url` to `outfn`
        """
        if not url:
            return
        def report_hook(index, blksize, size):
            if size <= 0:
                progression = '{0} bytes'.format(index * blksize)
            else:
                progression = '{0:.2f}%'.format(
                        index * blksize * 100. / float(size))
            stdout.write('- Download {}\r'.format(progression))
            stdout.flush()

        if cwd:
            filename = join(cwd, filename)
        if exists(filename):
            unlink(filename)

        # Clean up temporary files just in case before downloading.
        urlcleanup()

        print('Downloading {0}'.format(url))
        urlretrieve(url, filename, report_hook)
        return filename

Exemple #18

0

Afficher le fichier

Fichier : views.py Projet : dubeyji10/DocEditor

def download_document(request, pk):
    requested_doc = Document.objects.get(id=pk)

    # requested_doc = get_object_or_404(Document, pk=pk)
    # context = {'data': requested_doc}
    # pdf  = render_to_pdf()
    template_name = 'editor/DownloadDocument3.html'

    # print('-'*30)
    # print("this is the url of requested file : ",request,"id = ",pk)
    # print('-'*30)
    # print("this is the url of requested file --------- : ",request.path)
    # url_to_be_used = "http://localhost:8000"+request.path
    # print("url which should be passed -> :",url_to_be_used)
    # print('-'*40)
    # print(urlretrieve(url_to_be_used))
    # print("\n ----------------- now focus on downloading ----------------- ")
    # try:
    #     name, _ = url_to_be_used
    #     requested_doc.signed_file.save("{timestamp}.pdf".format(timestamp=timezone.now().strftime('%Y-%m-%d%/%H-%M-%S')), File(open(tempname, 'rb')))
    # finally:
    #     urlcleanup()
    try:
        # request.encoding = 'koi8-r'
        print("-trying to render it into pdf-\n")
        pdf = render_to_pdf(template_name, {
            'data': requested_doc.content,
            'title': 'download page3',
        })
    finally:
        urlcleanup()
    print("-pdf rendering successfull-")
    return HttpResponse(pdf, content_type='application/pdf', charset='utf-8')

Exemple #19

0

Afficher le fichier

def runner(source):
    "Download a file from a url into a directory"
    # Get the base file name
    url = source[0]
    directory = source[1]
    p = urlparse(url)
    file_name = path.basename(p.path)
    # Concatenate patha and file name

    path_and_file_name = path.join(directory, file_name)
    # Download file if not already exists
    # If you are getting errors of incomplete data, try reducing the number of max_workers
    try:
        if not path.exists(path_and_file_name):
            if url.startswith("ftp"):
                urlcleanup()
                urlretrieve(url, path_and_file_name)
            else:
                retrieve(url,path_and_file_name)
            logging.info(path_and_file_name)
        if path_and_file_name.endswith(".tar.gz"):
            shutil.unpack_archive(path_and_file_name, directory)
            logger.info(f"Unpacked: {path_and_file_name}")
    except Exception as e:
        logger.error(f"Problem with {file_name}.. You may need to manually download from: {url}")
        logger.error(e)

Exemple #20

0

Afficher le fichier

def getNDFDlist(listof, area=None, timerange=None, ndfd_server=NDFD_SERVER):
    """
    Description: Returns a list of available NDFD parameters
    Parameters:
        listof (str): Either "areas", "timeranges", or "vars"
        area (str): Needed if listof="timeranges" or "vars"
        timerange (str): Needed if listof="vars"
        ndfd_server (str): URL to NDFD server with files 
    Returns:
        lines (list): List of extracted parameters
    """
    if listof == "areas":
        regex = r"(?<=a href=\"AR\.).*(?=\/\">)"
    elif listof == "timeranges":
        ndfd_server += NDFD_AREA.format(area)
        regex = r"(?<=a href=\"VP\.)\d\d\d\-\d\d\d(?=\/)"
    elif listof == "vars":
        ndfd_server += NDFD_AREA.format(area) + NDFD_TRNG.format(timerange)
        regex = r"(?<=a href=\"ds\.).*(?=\.bin\")"
    with urlopen(ndfd_server) as file:
        lines = file.readlines()
    lines = [line.decode("utf-8") for line in lines]
    lines = "\n".join(lines)
    lines = re.findall(regex, lines)
    urlcleanup()
    return lines

Exemple #21

0

Afficher le fichier

Fichier : config.py Projet : LinkkG/chatango-bot-1

 def megach_update():
     try:
         url = "https://raw.githubusercontent.com/LinkkG/megach/master/megach.py"
         file_to_replace = "{}{}megach.py".format(paths.u_bot, os.sep)
         with urlreq.urlopen(url) as info_online:
             info_online = info_online.read().decode("utf-8").splitlines()
             megach_online_v = [x for x in info_online
                                if "version =" in x][0].split("=")[1]
             megach_online_v = megach_online_v.replace("'", "").lstrip(" ")
         with open(file_to_replace, encoding="utf-8") as a_megach:
             a_megach = a_megach.read().splitlines()
             megach_local_v = [x for x in a_megach
                               if "version =" in x][0].split("=")[1]
             megach_local_v = megach_local_v.replace("'", "").lstrip(" ")
             if a_megach == info_online:
                 download = [False, megach_local_v, megach_online_v]
             else:
                 urlreq.urlretrieve(url, file_to_replace)
                 download = [True, megach_local_v, megach_online_v]
                 urlreq.urlcleanup()
             return download
         a_megach.close()
         info_online.close()
     except:
         return "Error: {}".format(str(tools.error_def()))

Exemple #22

0

Afficher le fichier

Fichier : urllib_test.py Projet : zxz8/CodeSet

def ub_test(url):
    request.urlretrieve(url,filename="21.html")
    request.urlcleanup()
    file = request.urlopen(url).read()
    # while True:
    fileline = request.urlopen(url).readline()
        # print(fileline,"\n")
    return file,fileline

Exemple #23

0

Afficher le fichier

Fichier : download-data.py Projet : OurGroupProjects/cs3043-gerrymeandering

def getDistrictInfo(i):
    num = i + 1
    file_name = base_file_name + str(num).rjust(num_padding,
                                                '0') + file_extension
    local_file_name, headers = request.urlretrieve(base_url + file_name)
    ZipFile(local_file_name, 'r').extractall()
    request.urlcleanup()
    return

Exemple #24

0

Afficher le fichier

Fichier : urllib+gevent.py Projet : lui1798/automatic-repo

def urlretrieve(url):
    from urllib import request
    webname = "".join(("".join(url.split("//")[1:])).split("/")[:1])
    request.urlretrieve(url,dir+webname+'.html')
    #清除使用urlopen或urlretrieve后产生的缓存文件
    request.urlcleanup()
    gevent.sleep(1)
    print("{} fetch success!".format(webname))

Exemple #25

0

Afficher le fichier

 def download_to_local(self, url):
     try:
         name, _ = urlretrieve(url)
         f_name = os.path.basename(url)
         self.document.save(f_name, File(open(name, 'rb')))
     finally:
         #clear temp files after load
         urlcleanup()

Exemple #26

0

Afficher le fichier

Fichier : SEOValidator.py Projet : lgkern/WowheadGuideValidator

    def dataFetch(self, charClass, charSpec, guide):
        print('>Fetching {1} {0} {2}'.format(charClass, charSpec, guide))

        charClass = '-'.join(charClass.lower().split(' '))
        charSpec = '-'.join(charSpec.lower().split(' '))
        guide = '-'.join(guide.lower().split(' '))
        url = 'https://www.wowhead.com/{1}-{0}-{2}'.format(
            charClass, charSpec, guide)

        req = Request(url)
        try:
            urlcleanup()
            response = urlopen(req)
        except URLError as e:
            if hasattr(e, 'reason'):
                print('   We failed to reach a server.')
                print('   Reason: ', e.reason)
            elif hasattr(e, 'code'):
                print('    The server couldn\'t fulfill the request.')
                print('    Error code: ', e.code)
            return None, None
        else:
            html = response.read()
            soup = BeautifulSoup(html, 'html.parser')
            # kill all script and style elements
            for script in soup(["script", "style"]):
                script.extract()  # rip it out

            # get text
            text = soup.get_text()

            # break into lines and remove leading and trailing space on each
            lines = (line.strip() for line in text.splitlines())
            # break multi-headlines into a line each
            chunks = (phrase.strip() for line in lines
                      for phrase in line.split("  "))
            # drop blank lines
            text = '\n'.join(chunk for chunk in chunks if chunk)

            lines = text.split('\n')

            # Finds the title in the text
            title = lines[0].replace(' - Guides - Wowhead', '')

            # Finds the Context - It is the line after "ReportLinks"
            content = ''
            nextIsContent = False

            for line in lines:
                if nextIsContent:
                    content += line
                if 'ReportLinks' in line:
                    nextIsContent = True
                if 'Share your comments about ' in line:
                    break

            return title, content

Exemple #27

0

Afficher le fichier

Fichier : file_helper.py Projet : morning-dews/PCL

def download(url, des_path='.'):
    try:
        print('downloading {}'.format(url))
        request.urlretrieve(url, os.path.join(des_path, os.path.basename(url)))
        request.urlcleanup()
    except request.HTTPError as e:
        print('HTTP Error: {} {}'.format(e.code, url))
    except request.URLError as e:
        print('URL Error: {} {}'.format(e.reason, url))

Exemple #28

0

Afficher le fichier

Fichier : Scrape.py Projet : EricBlumenstock/ImageScraper

def download_images(imgs: [], path: str, volume=1, name=1):

    for i in imgs:
        urllib.urlretrieve(
            i, os.path.join(path,
                            str(volume) + '-' + str(name) + '.png'))
        name = name + 1

    urllib.urlcleanup()

Exemple #29

0

Afficher le fichier

Fichier : provider.py Projet : dohlee/genomepy

    def _post_process_download(self, name, genome_dir, mask="soft"):
        """
        Replace accessions with sequence names in fasta file.
        
        Parameters
        ----------
        name : str
            NCBI genome name

        genome_dir : str
            Genome directory
        """
        # Get the FTP url for this specific genome and download
        # the assembly report
        for genome in self.genomes:
            if genome["asm_name"] == name:
                url = genome["ftp_path"]
                url += "/" + url.split("/")[-1] + "_assembly_report.txt"
                url = url.replace("ftp://", "https://")
                break

        # Create mapping of accessions to names
        tr = {}
        urlcleanup()
        response = urlopen(url)
        for line in response.read().decode('utf-8').splitlines():
            if line.startswith("#"):
                continue
            vals = line.strip().split("\t")
            tr[vals[6]] = vals[0]

        name = name.replace(" ", "_")
        # Check of the original genome fasta exists
        fa = os.path.join(genome_dir, name, "{}.fa".format(name))
        if not os.path.exists(fa):
            raise Exception("Genome fasta file not found, {}".format(fa))

        # Use a tmp file and replace the names
        new_fa = os.path.join(genome_dir, name, ".process.{}.fa".format(name))
        if mask == "hard":
            sys.stderr.write("masking lower-case.\n")

        with open(fa) as old:
            with open(new_fa, "w") as new:
                for line in old:
                    if line.startswith(">"):
                        desc = line.strip()[1:]
                        name = desc.split(" ")[0]
                        new.write(">{} {}\n".format(tr.get(name, name), desc))
                    elif mask == "hard":
                        new.write(re.sub('[actg]', 'N', line))
                    else:
                        new.write(line)

        # Rename tmp file to real genome file
        shutil.move(new_fa, fa)

Exemple #30

0

Afficher le fichier

def retrieve_social_profile_photo(user_profile, url):
    """ Retrieve and save a retrieved user's profile photo from a url
    """
    try:
        filename, _ = urlretrieve(url)
        user_profile.profile_photo.save(filename, File(open(filename, 'rb')))
        user_profile.generate_nonce()
        user_profile.save()
    finally:
        urlcleanup()

Exemple #31

0

Afficher le fichier

        def terminate_download_callback(event):
            # Clean up after urllib urlretrieve which is used internally
            # in grass.utils.
            from urllib import request  # pylint: disable=import-outside-toplevel

            self._download_in_progress = False
            request.urlcleanup()
            sys.stdout.write("Download aborted")
            self.thread = gThread()
            self._change_download_btn_label()

Exemple #32

0

Afficher le fichier

Fichier : practice.py Projet : Chandler1990/practice_one

def main():
	data=urllib.request.urlopen('http://www.baidu.com')
	print(data.getcode())
	print(data.geturl())
	data_read=data.read()
	fhandle=open("E:/1.html","wb")
	fhandle.write(data_read)
	#另一种方法
	req.urlcleanup()
	filename=req.urlretrieve("http://edu.51cto.com","E:/2.html")

Exemple #33

0

Afficher le fichier

def get_tcc(cleanup=True):
    BUILD_DIR.mkdir(parents=True, exist_ok=True)
    try:
        if WINDOWS:
            _get_tcc_windows()
        else:
            _get_tcc_good_os(cleanup)
    finally:
        _info(f"Cleaning up downloaded temporary files.")
        urlcleanup()

Exemple #34

0

Afficher le fichier

Fichier : karaoke.py Projet : vmartinezf/ptavi-p3

 def do_local(self):
     for dic in self.lista_etiq:
         llave = list(dic.keys())[0]
         for elemento in dic[llave]:
             value = dic[llave][elemento]
             if value:
                 if (elemento == "src") and (value != "cancion.ogg"):
                     URL = value
                     filename = URL[URL.rfind("/") + 1:]
                     data = urlretrieve(URL, filename)
                     urlcleanup()
                     dic[llave][elemento] = data[0]

Exemple #35

0

Afficher le fichier

Fichier : upy_updater.py Projet : jakmaddox/upy

 def readUpdateNote(self,):
     URI = self.url
     tmpFileName = "update_notes_" + self.host + ".json"
     urllib.urlcleanup()
     if checkURL(URI):
         urllib.urlretrieve(URI, tmpFileName)  # ,reporthook=self.helper.reporthook)
         # geturl(URI, tmpFileName)
     else:
         print("problem connecting to server")
         return None
     with open(tmpFileName, "r") as fp:  # doesnt work with symbol link ?
         self.result_json = json.load(fp)

Exemple #36

0

Afficher le fichier

Fichier : provider.py Projet : Mpaperlee/genomepy

 def _get_genomes(self):
     """Parse genomes from assembly summary txt files."""
     genomes = []
     
     names = [
             "assembly_summary_refseq.txt", 
             "assembly_summary_refseq_historical.txt",
             ]
     
     for fname in names:
         urlcleanup() 
         response = urlopen(self.assembly_url + "/" + fname)
         lines = response.read().decode('utf-8').splitlines()
         header = lines[1].strip("# ").split("\t")
         for line in lines[2:]:
             vals = line.strip("# ").split("\t")
             genomes.append(dict(zip(header, vals)))
     
     return genomes

Exemple #37

0

Afficher le fichier

Fichier : lxtools.py Projet : litixsoft/baboon-stack

def getRemoteData(url):
    # Download from URL
    try:
        local_filename, headers = UrlRequest.urlretrieve(url)
    except IOError as e:
        print('IO Error! Abort!\n')
        print(e)
        return -1
    except:
        print('Unknow Error occured! Abort!')
        return -1

    # Open and Read local temporary File
    html = open(local_filename)
    data = html.read()
    html.close()

    # Delete temporary Internet File
    UrlRequest.urlcleanup()
    return data

Exemple #38

0

Afficher le fichier

Fichier : upy_updater.py Projet : jakmaddox/upy

    def update_plug(self, plug, path=None, typeUpdate="std", backup=False):
        import zipfile

        p = path
        if p is None:
            p = self.liste_plugin[plug]["path"]  # AutoFill.__path__[0]+os.sep #path of plug
        #        print "update_AF",AFwrkDir1
        if self.host in self.result_json[plug]["host"]:
            URI = self.server + "/" + plug + "_" + typeUpdate + "_" + self.host + ".zip"
        else:
            URI = self.server + "/" + plug + "_" + typeUpdate + "_all.zip"
        os.chdir(p)
        os.chdir("../")
        patchpath = os.path.abspath(os.curdir)
        tmpFileName = patchpath + os.sep + plug + "_" + typeUpdate + ".zip"

        urllib.urlcleanup()
        if checkURL(URI):
            urllib.urlretrieve(URI, tmpFileName, reporthook=self.helper.reporthook)
        else:
            return False
        zfile = zipfile.ZipFile(tmpFileName)
        #        TF=tarfile.TarFile(tmpFileName)
        dirname1 = p  # +os.sep+".."+os.sep+"AutoFill"
        import shutil

        if backup:
            # rename AF to AFv
            dirname2 = dirname1 + self.liste_plugin[plug]["version_current"]  # the version
            print(dirname1, dirname2)
            if os.path.exists(dirname2):
                shutil.rmtree(dirname2, True)
            shutil.copytree(dirname1, dirname2)
        if os.path.exists(dirname1):
            shutil.rmtree(dirname1, True)
        #        TF.extractall(patchpath)
        zfile.extractall(patchpath)
        zfile.close()
        os.remove(tmpFileName)
        return True

Exemple #39

0

Afficher le fichier

Fichier : fb_dl.py Projet : kevlar1818/fillbukkit

 def download(self):
     furlo = FBURLopener({})
     try:
         tmpfile, msg = furlo.retrieve(self.url, reporthook=self.rhook)
         print()
     except HTTPError as ex:
         urlcleanup()
         sys.exit(ex)
     except URLError as ex:
         urlcleanup()
         sys.exit(ex)
     if os.path.exists(self.dlpath) and filecmp.cmp(self.dlpath, tmpfile):
         print('You already have the newest version of ' + self.plugin)
         done = True
     else:
         shutil.copyfile(tmpfile, self.dlpath)
         print(self.plugin + ' downloaded.')
         done = False
     urlcleanup()
     if done or self.format == 'jar':
         return
     try:
         shutil.unpack_archive(self.dlpath, self.dest_dir, self.format)
     except ValueError as ex:
         sys.exit('Error: ' + str(ex))

Exemple #40

0

Afficher le fichier

Fichier : landscraper.py Projet : kevlar1818/landscraper

	def download(self):
		if os.path.exists(self.filename):
			print('File %r exists locally.' % self.filename)
			return
		try:
			print('Downloading: %r' % self.url)
			self.urlo.retrieve(self.url, self.filename, reporthook=self.rhook)
			print()
		except HTTPError as ex:
			request.urlcleanup()
			sys.exit(ex)
		except URLError as ex:
			request.urlcleanup()
			sys.exit(ex)
		request.urlcleanup()

Exemple #41

0

Afficher le fichier

Fichier : lxtools.py Projet : litixsoft/baboon-stack

def cleanUpTemporaryFiles():
    UrlRequest.urlcleanup()

Exemple #42

0

Afficher le fichier

Fichier : provider.py Projet : Mpaperlee/genomepy

    def download_genome(self, name, genome_dir, localname=None, mask="soft", regex=None, invert_match=False, version=None):
        """
        Download a (gzipped) genome file to a specific directory

        Parameters
        ----------
        name : str
            Genome / species name
        
        genome_dir : str
            Directory to install genome

        mask: str , optional
            Masking, soft, hard or none (all other strings)
        """
        genome_dir = os.path.expanduser(genome_dir)
        
        if not os.path.exists(genome_dir):
            os.makedirs(genome_dir)
        
        dbname, link = self.get_genome_download_link(name, mask=mask, version=version)
        myname = dbname 
        if localname:
            myname = localname
        
        myname = myname.replace(" ", "_")

        gzipped = False
        if link.endswith(".gz"):
            gzipped = True

        if not os.path.exists(os.path.join(genome_dir, myname)):
            os.makedirs(os.path.join(genome_dir, myname))
        urlcleanup()
        response = urlopen(link)
         
        sys.stderr.write("downloading from {}...\n".format(link))
        down_dir = genome_dir
        fname = os.path.join(genome_dir, myname, myname + ".fa")
        if regex:
            down_dir = mkdtemp()
            fname = os.path.join(down_dir, myname + ".fa") 
        with open(fname, "wb") as f_out:
            if gzipped:
                # Supports both Python 2.7 as well as 3
                with gzip.GzipFile(fileobj=io.BytesIO(response.read())) as f_in:
                    shutil.copyfileobj(f_in, f_out)
            else:
                f_out.write(response.read())
        sys.stderr.write("done...\n")
        
        if link.endswith("tar.gz"):
            self.tar_to_bigfile(fname, fname) 
        
        if hasattr(self, '_post_process_download'):
            self._post_process_download(name, down_dir, mask)
        
        if regex:
            infa = fname
            outfa = os.path.join(genome_dir, myname, myname + ".fa") 
            filter_fasta(
                infa,
                outfa,
                regex=regex,
                v=invert_match,
                force=True
                )

            not_included = [k for k in Fasta(infa).keys() if k not in Fasta(outfa).keys()]
            shutil.rmtree(down_dir)
            fname = outfa
        
        sys.stderr.write("name: {}\n".format(dbname))
        sys.stderr.write("local name: {}\n".format(myname))
        sys.stderr.write("fasta: {}\n".format(fname))

        # Create readme with information
        readme = os.path.join(genome_dir, myname, "README.txt")
        with open(readme, "w") as f:
            f.write("name: {}\n".format(myname))
            f.write("original name: {}\n".format(dbname))
            f.write("original filename: {}\n".format(os.path.split(link)[-1]))
            f.write("url: {}\n".format(link))
            f.write("mask: {}\n".format(mask))
            f.write("date: {}\n".format(time.strftime("%Y-%m-%d %H:%M:%S")))
            if regex:
                if invert_match:
                    f.write("regex: {} (inverted match)\n".format(regex))
                else:
                    f.write("regex: {}\n".format(regex))
                f.write("sequences that were excluded:\n")
                for seq in not_included:
                    f.write("\t{}\n".format(seq))
#
       
        return myname

Exemple #43

0

Afficher le fichier

Fichier : provider.py Projet : Mpaperlee/genomepy

    def _post_process_download(self, name, genome_dir, mask="soft"):
        """
        Replace accessions with sequence names in fasta file.
        
        Parameters
        ----------
        name : str
            NCBI genome name

        genome_dir : str
            Genome directory
        """
        # Get the FTP url for this specific genome and download
        # the assembly report
        for genome in self.genomes:
            if genome["asm_name"] == name:
                url = genome["ftp_path"]
                url += "/" + url.split("/")[-1] + "_assembly_report.txt"
                break
   
        # Create mapping of accessions to names
        tr = {}
        urlcleanup()
        response = urlopen(url)
        for line in response.read().decode('utf-8').splitlines():
            if line.startswith("#"):
                continue
            vals = line.strip().split("\t")
            tr[vals[6]] = vals[0]
    
        name = name.replace(" ", "_")
        # Check of the original genome fasta exists
        fa = os.path.join(genome_dir, name, "{}.fa".format(name))
        if not os.path.exists(fa):
            raise Exception("Genome fasta file not found, {}".format(fa))
        
        # Use a tmp file and replace the names
        new_fa = os.path.join(
                genome_dir, name,
                ".process.{}.fa".format(name)
                )
        if mask == "hard":
            sys.stderr.write("masking lower-case.\n")

        with open(fa) as old:
            with open(new_fa, "w") as new:
                for line in old:
                    if line.startswith(">"):
                        desc = line.strip()[1:]
                        name = desc.split(" ")[0]
                        new.write(">{} {}\n".format(
                            tr.get(name, name),
                            desc
                            ))
                    elif mask == "hard":
                        new.write(re.sub('[actg]', 'N', line))
                    else:
                        new.write(line)
        
        # Rename tmp file to real genome file
        shutil.move(new_fa, fa)

Exemple #44

0

Afficher le fichier

Fichier : downloader_v3x.py Projet : gsidhu/Coursera_Downloader

def download():
	count = 0
	# Set the limits of the range according to the "id" keys 	
    for i in range(1, 2000):
		count += 1
		
		try:		
			if retain_original_names:
			    urr.urlretrieve(link + str(i))
			else:
				urr.urlretrieve(link + str(i), "Lecture " + str(count))
		except AttributeError:
		    with urr.urlopen(link + str(i)) as in_data, open("Lecture " + str(count), 'wb') as out_video:
			    copyfileobj(in_data, out_video)
		
	    i += ticker
        time.sleep(buffer_time)
	
	print (str(count) + " videos have been downloaded.")

# Removing any temporary files left behind
urr.urlcleanup()

##############################################################################################################
## Known Issues
## If the downloaded files are named "Lecture 1,2,3 ..." even though retain_original_names is set to True - 
##		1. Python Org as finally deprecated the urlretrieve attribute
##		2. I still haven't figured out how to scrape Coursera's website
##############################################################################################################