def Download(index): try: print('\r' + 'Loading...\n', end='', flush=True) for i in range(int(index)): url = r"https://picsum.photos/xxx/yyy" size_x = [ 640, 800, 800, 960, 1024, 1024, 1152, 1280, 1280, 1280, 1366, 1400, 1440, 1440, 1600, 1600, 1680, 1856, 1920, 1920, 1920, 2048, 2560, 2560, 3840 ] size_y = [ 480, 576, 600, 648, 720, 720, 768, 768, 800, 900, 900, 960, 1050, 1050, 1080, 1080, 1200, 1200, 1392, 1440, 1440, 1536, 1600, 2160, 4320 ] data = url.replace('xxx', str(size_x[random.randint(0, len(size_x) - 1)])) data = data.replace( 'yyy', str(size_y[random.randint(0, len(size_y) - 1)])) #print(data) if not os.path.exists('images'): os.makedirs('images') imgfilename = 'images/' + datetime.datetime.today().strftime( '%d%m%Y-%H%M%S%f%p') + '.jpg' req.urlretrieve(data + '.jpg', imgfilename) req.urlcleanup() print('\r' + '\nCompleted\n', end='', flush=True) except KeyboardInterrupt: print("\n[0] Force Exit Command Executed\n") req.urlcleanup() exit()
def getPic(pageUrl, savingPath): """ 抓取所有图片 """ # global urlList, nameList html = urlopen(pageUrl) bsObj = BeautifulSoup(html, features='html.parser') img = bsObj.find("div", { 'class': 'picsbox picsboxcenter' }).find("img", {"src": re.compile("https://img\.lovebuy99\.com.*jpg$")}) imgName = bsObj.find("h1").get_text() # 获取图片名字 imgUrl = img.attrs['src'] # print(imgName) # print(imgUrl) # urlList.append(imgUrl) # nameList.append(imgName) # if not os.path.exists(savingPath): # 为每一model创建文件夹 # os.mkdir(savingPath) urlretrieve( imgUrl, filename=savingPath + imgName + ".jpg") # 下载图片,注意filename格式,前面必须要加保存路径,才能将图片保存在指定文件夹中,否则将保存在当前文件夹 urlcleanup() # 抓取下一页的图片 next_page = bsObj.find('a', text=u"下一页") next_pageUrl = urljoin(pageUrl, next_page.attrs['href']) # print(next_pageUrl) if next_page.attrs['href'] != "#": time.sleep(1) # 暂停1秒,避免网站访问过快被反爬机制认为是蜘蛛 getPic(next_pageUrl, savingPath)
def download_file(self, url, filename, cwd=None): """ Download an `url` to `outfn` """ if not url: return def report_hook(index, blksize, size): if size <= 0: progression = '{0} bytes'.format(index * blksize) else: progression = '{0:.2f}%'.format(index * blksize * 100. / float(size)) stdout.write('- Download {}\r'.format(progression)) stdout.flush() if cwd: filename = join(cwd, filename) if exists(filename): unlink(filename) # Clean up temporary files just in case before downloading. urlcleanup() print('Downloading {0}'.format(url)) urlretrieve(url, filename, report_hook) return filename
def _get_genomes(self): """Parse genomes from assembly summary txt files.""" genomes = [] names = [ "assembly_summary_refseq.txt", "assembly_summary_genbank.txt", "assembly_summary_refseq_historical.txt", ] sys.stderr.write( "Downloading assembly summaries from NCBI, " + "this will take a while...\n" ) seen = {} for fname in names: urlcleanup() with urlopen(os.path.join(self.assembly_url, fname)) as response: lines = response.read().decode("utf-8").splitlines() header = lines[1].strip("# ").split("\t") for line in lines[2:]: vals = line.strip("# ").split("\t") # Don't repeat samples with the same asn_name if vals[15] not in seen: # asn_name genomes.append(dict(zip(header, vals))) seen[vals[15]] = 1 return genomes
def download_podcast(podcast): try: if 'part' in podcast: filename = LOCAL_PATTERN_MULTI.format(**podcast) else: filename = LOCAL_PATTERN.format(**podcast) if os.path.exists(filename): return print('+', filename, '==> ', end='') mp3_url = find_mp3_url(podcast['url']) print(mp3_url) try: os.makedirs(os.path.dirname(filename)) except OSError: pass tempfile, _ = urlretrieve(mp3_url, reporthook=download_progress) print() # Go past progress bar shutil.move(tempfile, filename) except Exception: print('FAILED') finally: urlcleanup()
def getImg(self): try: reg = r'src="(.+?\.jpg)"' imglist = re.findall(reg, self.html) if (len(imglist)): for i in range(len(imglist)): imglist[i] = 'http://www.fzlu.com' + imglist[i] for imgurl in imglist: try: request.urlretrieve( imgurl, 'E:/photo/%d.jpg' % (self.num + self.baseImgNum)) print('已保存第%d张图片' % (self.num + self.baseImgNum)) print(threading.current_thread()) self.allImgList.append(imgurl) if self.num == 10: break except Exception as e: print(e.args) request.urlcleanup() except Exception as e: print("正则表达式异常") return self.num, self.allImgList
def _post_process_download(self, name, localname, out_dir, mask="soft"): """ Replace accessions with sequence names in fasta file. Parameters ---------- name : str NCBI genome name out_dir : str Output directory """ # Get the FTP url for this specific genome and download # the assembly report for genome in self.genomes: if name in [genome["asm_name"], genome["asm_name"].replace(" ", "_")]: url = genome["ftp_path"] url += "/" + url.split("/")[-1] + "_assembly_report.txt" url = url.replace("ftp://", "https://") break # Create mapping of accessions to names tr = {} urlcleanup() with urlopen(url) as response: for line in response.read().decode("utf-8").splitlines(): if line.startswith("#"): continue vals = line.strip().split("\t") tr[vals[6]] = vals[0] localname = get_localname(name, localname) # Check of the original genome fasta exists fa = os.path.join(out_dir, "{}.fa".format(localname)) if not os.path.exists(fa): raise Exception("Genome fasta file not found, {}".format(fa)) # Use a tmp file and replace the names new_fa = os.path.join(out_dir, ".process.{}.fa".format(localname)) if mask != "soft": sys.stderr.write( "NCBI genomes are softmasked by default. Changing mask...\n" ) with open(fa) as old: with open(new_fa, "w") as new: for line in old: if line.startswith(">"): desc = line.strip()[1:] name = desc.split(" ")[0] new.write(">{} {}\n".format(tr.get(name, name), desc)) elif mask == "hard": new.write(re.sub("[actg]", "N", line)) elif mask not in ["hard", "soft"]: new.write(line.upper()) else: new.write(line) # Rename tmp file to real genome file shutil.move(new_fa, fa)
def print_pl2_label(self, macaddress): mac = macaddress data = {"message": "printing PL2 label"} self._signal_printer.emit(data) sleep(0.5) try: url = self.corelight.get_pl2_label_download_url(mac) filename = os.path.basename(url) filepath = os.path.join(os.getcwd(), filename) urlretrieve(url, filepath, self.download_callback) print("download pl2:", url, filename, filepath) if self.download_percent == 100: self.download_percent = 0 if os.path.exists(filepath): self.printing(filepath) sleep(0.1) data = { "message": "print PL2 label success", "filepath": filepath } self._signal_printer.emit(data) sleep(0.5) os.unlink(filepath) except Exception as e: print(e) data = {"message": "print PL2 label fail"} self._signal_printer.emit(data) finally: urlcleanup()
def tmploop_get_remote_files(i,matching,user,pw, server,remote_path, path_local): """ Function to download files using ftp. Tries 10 times before failing. """ print("File: ",matching[i]) print("src path: ", remote_path) pw = quote(pw) # to escape special characters dlstr=('ftp://' + user + ':' + pw + '@' + server + remote_path + matching[i]) for attempt in range(10): print ("Attempt to download data: ") try: print ("Downloading file") urlretrieve(dlstr, os.path.join(path_local, matching[i])) urlcleanup() except Exception as e: print (e.__doc__) print (e.message) print ("Waiting for 10 sec and retry") time.sleep(10) else: break else: print ('An error was raised and I ' + 'failed to fix problem myself :(') print ('Exit program') sys.exit()
def main(url, args): ''' Convert given tar file to conda packages ''' cwd = os.getcwd() args.output_folder = os.path.abspath(args.output_folder) os.makedirs(args.output_folder, exist_ok=True) args.recipe_dir = os.path.abspath(args.recipe_dir) if os.path.isfile(args.recipe_dir): args.recipe_dir = os.path.dirname(args.recipe_dir) download = False if url.startswith('http:') or url.startswith('https:'): print_err('> download %s' % url) download = True url, headers = urlretrieve(url) urlcleanup() elif os.path.exists(url): url = os.path.abspath(url) with TemporaryDirectory() as temp: with tarfile.open(url, url.endswith('tar') and 'r:' or 'r:gz') as tar: tar.extractall(temp) # Clean up if download: urlcleanup() os.chdir(temp) url = os.path.join(temp, glob.glob('R-swat*')[0]) # Create conda package for each R version for base, versions in get_supported_versions(args.platform).items(): for ver in versions: update_recipe(args.recipe_dir, url=url, version=get_version(url), r_base='{}-base'.format(base), r_version=ver) cmd = ['conda', 'build', '-q', '--no-test'] cmd.extend(['--R', ver]) if args.output_folder: cmd.extend(['--output-folder', args.output_folder]) if args.override_channels: cmd.append('--override-channels') if args.channel: for chan in args.channel: cmd.extend(['--channel', chan]) cmd.append(args.recipe_dir) print_err('>' + ' '.join(cmd)) subprocess.check_output(cmd) os.chdir(cwd)
def fetch_from_PDB(self): """ Connects to PDB FTP server, downloads a .gz file of interest, decompresses the .gz file into .ent and then dumps a copy of the pdb{code}.ent file into cwd. Parameters ---------- None Examples -------- >>> inst = PDBFile('1rcy') >>> path_to_file = inst.fetch_from_PDB() >>> print(path_to_file) """ subdir = self.code[1:3] infile = 'pdb{}.ent.gz'.format(self.code) decompressed = infile.strip('.gz') fullpath = ROOT.format(subdir, infile) try: urlcleanup() urlretrieve(fullpath, infile) except Exception: return 'URLError' else: with gzip.open(infile, 'rb') as gz: with open(decompressed, 'wb') as out: out.writelines(gz) remove(infile) return path.join(getcwd(), decompressed)
def checkForUpdate(self,): # check on web if update available # return boolean for update_PMV,update_ePMV and update_pyubics # where to check and what type dev/stable/host self.new_version = self.liste_plugin self.update_notes = "" self.result_json = None # need version URI = self.url tmpFileName = "update_notes_" + self.host + ".json" # if not os.path.isfile(tmpFileName): urllib.urlcleanup() if checkURL(URI): urllib.urlretrieve(URI, tmpFileName) # ,reporthook=self.helper.reporthook) # geturl(URI, tmpFileName) else: print("problem connecting to server") return None with open(tmpFileName, "r") as fp: # doesnt work with symbol link ? self.result_json = json.load(fp) do_update = [] for plug in self.liste_plugin: self.liste_plugin[plug]["update"] = False if self.liste_plugin[plug]["version_current"] != self.result_json[plug]["version_" + self.typeUpdate]: if self.result_json[plug]["host"] == ["all"] or self.host in self.result_json[plug]["host"]: self.liste_plugin[plug]["update"] = True self.liste_plugin[plug]["host"] = self.result_json[plug]["host"] do_update.append(self.liste_plugin[plug]["update"]) self.update_notes = self.result_json["notes"] # self.merge_list_plug() print(self.update_notes) os.remove(tmpFileName) return do_update
def getdocument(url): try: return req.urlopen(url) except req.URLError as e: print(e.reason) req.urlcleanup() sys.exit(1)
def prepare_file(uplodad=False): if uplodad: for file in glob("out/**"): if os.path.isfile(file): try: os.unlink(file) except Exception: pass i = 0 while i < 8: print("on loop", i + 1) submitter = "me" vid = sys.argv[1] duration = get_duration(vid) print(duration) start = 0 if duration > DURATION: start = int(random.random() * (duration - DURATION)) duration = DURATION filename, info_text, failed = v_glitch( vid, i, submitter, start, duration ) urlcleanup() if failed: print("something went wrong") print("FAILED:", info_text) else: print("filename if'ed") print(info_text) print("new path:", filename) vid = filename i += 1 return info_text, filename
def fetch(): url = 'https://us.battle.net/forums/en/wow/1011639/' results = [] req = Request(url) try: urlcleanup() response = urlopen(req) except URLError as e: if hasattr(e, 'reason'): print(' We failed to reach a server.') print(' Reason: ', e.reason) elif hasattr(e, 'code'): print(' The server couldn\'t fulfill the request.') print(' Error code: ', e.code) return None, None else: html = response.read() soup = BeautifulSoup(html, 'html.parser') # get text text = soup.findAll('a', attrs={'class': 'ForumTopic'}) for topic in text: title = topic.find('span', attrs={'class': 'ForumTopic-title'}) title = re.sub(r'[^\x00-\x7F]+', ' ', title.text.strip()) if re.search(r'^[^<>]+$', title): results.append((topic['href'][21:], title)) return results
def download_images(imgs: [], path: str, c=0): for i in imgs: urllib.urlretrieve(i, os.path.join(path, str(c) + '.png')) c = c + 1 urllib.urlcleanup()
def download_file(self, url, filename, cwd=None): """ Download an `url` to `outfn` """ if not url: return def report_hook(index, blksize, size): if size <= 0: progression = '{0} bytes'.format(index * blksize) else: progression = '{0:.2f}%'.format( index * blksize * 100. / float(size)) stdout.write('- Download {}\r'.format(progression)) stdout.flush() if cwd: filename = join(cwd, filename) if exists(filename): unlink(filename) # Clean up temporary files just in case before downloading. urlcleanup() print('Downloading {0}'.format(url)) urlretrieve(url, filename, report_hook) return filename
def download_document(request, pk): requested_doc = Document.objects.get(id=pk) # requested_doc = get_object_or_404(Document, pk=pk) # context = {'data': requested_doc} # pdf = render_to_pdf() template_name = 'editor/DownloadDocument3.html' # print('-'*30) # print("this is the url of requested file : ",request,"id = ",pk) # print('-'*30) # print("this is the url of requested file --------- : ",request.path) # url_to_be_used = "http://localhost:8000"+request.path # print("url which should be passed -> :",url_to_be_used) # print('-'*40) # print(urlretrieve(url_to_be_used)) # print("\n ----------------- now focus on downloading ----------------- ") # try: # name, _ = url_to_be_used # requested_doc.signed_file.save("{timestamp}.pdf".format(timestamp=timezone.now().strftime('%Y-%m-%d%/%H-%M-%S')), File(open(tempname, 'rb'))) # finally: # urlcleanup() try: # request.encoding = 'koi8-r' print("-trying to render it into pdf-\n") pdf = render_to_pdf(template_name, { 'data': requested_doc.content, 'title': 'download page3', }) finally: urlcleanup() print("-pdf rendering successfull-") return HttpResponse(pdf, content_type='application/pdf', charset='utf-8')
def runner(source): "Download a file from a url into a directory" # Get the base file name url = source[0] directory = source[1] p = urlparse(url) file_name = path.basename(p.path) # Concatenate patha and file name path_and_file_name = path.join(directory, file_name) # Download file if not already exists # If you are getting errors of incomplete data, try reducing the number of max_workers try: if not path.exists(path_and_file_name): if url.startswith("ftp"): urlcleanup() urlretrieve(url, path_and_file_name) else: retrieve(url,path_and_file_name) logging.info(path_and_file_name) if path_and_file_name.endswith(".tar.gz"): shutil.unpack_archive(path_and_file_name, directory) logger.info(f"Unpacked: {path_and_file_name}") except Exception as e: logger.error(f"Problem with {file_name}.. You may need to manually download from: {url}") logger.error(e)
def getNDFDlist(listof, area=None, timerange=None, ndfd_server=NDFD_SERVER): """ Description: Returns a list of available NDFD parameters Parameters: listof (str): Either "areas", "timeranges", or "vars" area (str): Needed if listof="timeranges" or "vars" timerange (str): Needed if listof="vars" ndfd_server (str): URL to NDFD server with files Returns: lines (list): List of extracted parameters """ if listof == "areas": regex = r"(?<=a href=\"AR\.).*(?=\/\">)" elif listof == "timeranges": ndfd_server += NDFD_AREA.format(area) regex = r"(?<=a href=\"VP\.)\d\d\d\-\d\d\d(?=\/)" elif listof == "vars": ndfd_server += NDFD_AREA.format(area) + NDFD_TRNG.format(timerange) regex = r"(?<=a href=\"ds\.).*(?=\.bin\")" with urlopen(ndfd_server) as file: lines = file.readlines() lines = [line.decode("utf-8") for line in lines] lines = "\n".join(lines) lines = re.findall(regex, lines) urlcleanup() return lines
def megach_update(): try: url = "https://raw.githubusercontent.com/LinkkG/megach/master/megach.py" file_to_replace = "{}{}megach.py".format(paths.u_bot, os.sep) with urlreq.urlopen(url) as info_online: info_online = info_online.read().decode("utf-8").splitlines() megach_online_v = [x for x in info_online if "version =" in x][0].split("=")[1] megach_online_v = megach_online_v.replace("'", "").lstrip(" ") with open(file_to_replace, encoding="utf-8") as a_megach: a_megach = a_megach.read().splitlines() megach_local_v = [x for x in a_megach if "version =" in x][0].split("=")[1] megach_local_v = megach_local_v.replace("'", "").lstrip(" ") if a_megach == info_online: download = [False, megach_local_v, megach_online_v] else: urlreq.urlretrieve(url, file_to_replace) download = [True, megach_local_v, megach_online_v] urlreq.urlcleanup() return download a_megach.close() info_online.close() except: return "Error: {}".format(str(tools.error_def()))
def ub_test(url): request.urlretrieve(url,filename="21.html") request.urlcleanup() file = request.urlopen(url).read() # while True: fileline = request.urlopen(url).readline() # print(fileline,"\n") return file,fileline
def getDistrictInfo(i): num = i + 1 file_name = base_file_name + str(num).rjust(num_padding, '0') + file_extension local_file_name, headers = request.urlretrieve(base_url + file_name) ZipFile(local_file_name, 'r').extractall() request.urlcleanup() return
def urlretrieve(url): from urllib import request webname = "".join(("".join(url.split("//")[1:])).split("/")[:1]) request.urlretrieve(url,dir+webname+'.html') #清除使用urlopen或urlretrieve后产生的缓存文件 request.urlcleanup() gevent.sleep(1) print("{} fetch success!".format(webname))
def download_to_local(self, url): try: name, _ = urlretrieve(url) f_name = os.path.basename(url) self.document.save(f_name, File(open(name, 'rb'))) finally: #clear temp files after load urlcleanup()
def dataFetch(self, charClass, charSpec, guide): print('>Fetching {1} {0} {2}'.format(charClass, charSpec, guide)) charClass = '-'.join(charClass.lower().split(' ')) charSpec = '-'.join(charSpec.lower().split(' ')) guide = '-'.join(guide.lower().split(' ')) url = 'https://www.wowhead.com/{1}-{0}-{2}'.format( charClass, charSpec, guide) req = Request(url) try: urlcleanup() response = urlopen(req) except URLError as e: if hasattr(e, 'reason'): print(' We failed to reach a server.') print(' Reason: ', e.reason) elif hasattr(e, 'code'): print(' The server couldn\'t fulfill the request.') print(' Error code: ', e.code) return None, None else: html = response.read() soup = BeautifulSoup(html, 'html.parser') # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out # get text text = soup.get_text() # break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines text = '\n'.join(chunk for chunk in chunks if chunk) lines = text.split('\n') # Finds the title in the text title = lines[0].replace(' - Guides - Wowhead', '') # Finds the Context - It is the line after "ReportLinks" content = '' nextIsContent = False for line in lines: if nextIsContent: content += line if 'ReportLinks' in line: nextIsContent = True if 'Share your comments about ' in line: break return title, content
def download(url, des_path='.'): try: print('downloading {}'.format(url)) request.urlretrieve(url, os.path.join(des_path, os.path.basename(url))) request.urlcleanup() except request.HTTPError as e: print('HTTP Error: {} {}'.format(e.code, url)) except request.URLError as e: print('URL Error: {} {}'.format(e.reason, url))
def download_images(imgs: [], path: str, volume=1, name=1): for i in imgs: urllib.urlretrieve( i, os.path.join(path, str(volume) + '-' + str(name) + '.png')) name = name + 1 urllib.urlcleanup()
def _post_process_download(self, name, genome_dir, mask="soft"): """ Replace accessions with sequence names in fasta file. Parameters ---------- name : str NCBI genome name genome_dir : str Genome directory """ # Get the FTP url for this specific genome and download # the assembly report for genome in self.genomes: if genome["asm_name"] == name: url = genome["ftp_path"] url += "/" + url.split("/")[-1] + "_assembly_report.txt" url = url.replace("ftp://", "https://") break # Create mapping of accessions to names tr = {} urlcleanup() response = urlopen(url) for line in response.read().decode('utf-8').splitlines(): if line.startswith("#"): continue vals = line.strip().split("\t") tr[vals[6]] = vals[0] name = name.replace(" ", "_") # Check of the original genome fasta exists fa = os.path.join(genome_dir, name, "{}.fa".format(name)) if not os.path.exists(fa): raise Exception("Genome fasta file not found, {}".format(fa)) # Use a tmp file and replace the names new_fa = os.path.join(genome_dir, name, ".process.{}.fa".format(name)) if mask == "hard": sys.stderr.write("masking lower-case.\n") with open(fa) as old: with open(new_fa, "w") as new: for line in old: if line.startswith(">"): desc = line.strip()[1:] name = desc.split(" ")[0] new.write(">{} {}\n".format(tr.get(name, name), desc)) elif mask == "hard": new.write(re.sub('[actg]', 'N', line)) else: new.write(line) # Rename tmp file to real genome file shutil.move(new_fa, fa)
def retrieve_social_profile_photo(user_profile, url): """ Retrieve and save a retrieved user's profile photo from a url """ try: filename, _ = urlretrieve(url) user_profile.profile_photo.save(filename, File(open(filename, 'rb'))) user_profile.generate_nonce() user_profile.save() finally: urlcleanup()
def terminate_download_callback(event): # Clean up after urllib urlretrieve which is used internally # in grass.utils. from urllib import request # pylint: disable=import-outside-toplevel self._download_in_progress = False request.urlcleanup() sys.stdout.write("Download aborted") self.thread = gThread() self._change_download_btn_label()
def main(): data=urllib.request.urlopen('http://www.baidu.com') print(data.getcode()) print(data.geturl()) data_read=data.read() fhandle=open("E:/1.html","wb") fhandle.write(data_read) #另一种方法 req.urlcleanup() filename=req.urlretrieve("http://edu.51cto.com","E:/2.html")
def get_tcc(cleanup=True): BUILD_DIR.mkdir(parents=True, exist_ok=True) try: if WINDOWS: _get_tcc_windows() else: _get_tcc_good_os(cleanup) finally: _info(f"Cleaning up downloaded temporary files.") urlcleanup()
def do_local(self): for dic in self.lista_etiq: llave = list(dic.keys())[0] for elemento in dic[llave]: value = dic[llave][elemento] if value: if (elemento == "src") and (value != "cancion.ogg"): URL = value filename = URL[URL.rfind("/") + 1:] data = urlretrieve(URL, filename) urlcleanup() dic[llave][elemento] = data[0]
def readUpdateNote(self,): URI = self.url tmpFileName = "update_notes_" + self.host + ".json" urllib.urlcleanup() if checkURL(URI): urllib.urlretrieve(URI, tmpFileName) # ,reporthook=self.helper.reporthook) # geturl(URI, tmpFileName) else: print("problem connecting to server") return None with open(tmpFileName, "r") as fp: # doesnt work with symbol link ? self.result_json = json.load(fp)
def _get_genomes(self): """Parse genomes from assembly summary txt files.""" genomes = [] names = [ "assembly_summary_refseq.txt", "assembly_summary_refseq_historical.txt", ] for fname in names: urlcleanup() response = urlopen(self.assembly_url + "/" + fname) lines = response.read().decode('utf-8').splitlines() header = lines[1].strip("# ").split("\t") for line in lines[2:]: vals = line.strip("# ").split("\t") genomes.append(dict(zip(header, vals))) return genomes
def getRemoteData(url): # Download from URL try: local_filename, headers = UrlRequest.urlretrieve(url) except IOError as e: print('IO Error! Abort!\n') print(e) return -1 except: print('Unknow Error occured! Abort!') return -1 # Open and Read local temporary File html = open(local_filename) data = html.read() html.close() # Delete temporary Internet File UrlRequest.urlcleanup() return data
def update_plug(self, plug, path=None, typeUpdate="std", backup=False): import zipfile p = path if p is None: p = self.liste_plugin[plug]["path"] # AutoFill.__path__[0]+os.sep #path of plug # print "update_AF",AFwrkDir1 if self.host in self.result_json[plug]["host"]: URI = self.server + "/" + plug + "_" + typeUpdate + "_" + self.host + ".zip" else: URI = self.server + "/" + plug + "_" + typeUpdate + "_all.zip" os.chdir(p) os.chdir("../") patchpath = os.path.abspath(os.curdir) tmpFileName = patchpath + os.sep + plug + "_" + typeUpdate + ".zip" urllib.urlcleanup() if checkURL(URI): urllib.urlretrieve(URI, tmpFileName, reporthook=self.helper.reporthook) else: return False zfile = zipfile.ZipFile(tmpFileName) # TF=tarfile.TarFile(tmpFileName) dirname1 = p # +os.sep+".."+os.sep+"AutoFill" import shutil if backup: # rename AF to AFv dirname2 = dirname1 + self.liste_plugin[plug]["version_current"] # the version print(dirname1, dirname2) if os.path.exists(dirname2): shutil.rmtree(dirname2, True) shutil.copytree(dirname1, dirname2) if os.path.exists(dirname1): shutil.rmtree(dirname1, True) # TF.extractall(patchpath) zfile.extractall(patchpath) zfile.close() os.remove(tmpFileName) return True
def download(self): furlo = FBURLopener({}) try: tmpfile, msg = furlo.retrieve(self.url, reporthook=self.rhook) print() except HTTPError as ex: urlcleanup() sys.exit(ex) except URLError as ex: urlcleanup() sys.exit(ex) if os.path.exists(self.dlpath) and filecmp.cmp(self.dlpath, tmpfile): print('You already have the newest version of ' + self.plugin) done = True else: shutil.copyfile(tmpfile, self.dlpath) print(self.plugin + ' downloaded.') done = False urlcleanup() if done or self.format == 'jar': return try: shutil.unpack_archive(self.dlpath, self.dest_dir, self.format) except ValueError as ex: sys.exit('Error: ' + str(ex))
def download(self): if os.path.exists(self.filename): print('File %r exists locally.' % self.filename) return try: print('Downloading: %r' % self.url) self.urlo.retrieve(self.url, self.filename, reporthook=self.rhook) print() except HTTPError as ex: request.urlcleanup() sys.exit(ex) except URLError as ex: request.urlcleanup() sys.exit(ex) request.urlcleanup()
def cleanUpTemporaryFiles(): UrlRequest.urlcleanup()
def download_genome(self, name, genome_dir, localname=None, mask="soft", regex=None, invert_match=False, version=None): """ Download a (gzipped) genome file to a specific directory Parameters ---------- name : str Genome / species name genome_dir : str Directory to install genome mask: str , optional Masking, soft, hard or none (all other strings) """ genome_dir = os.path.expanduser(genome_dir) if not os.path.exists(genome_dir): os.makedirs(genome_dir) dbname, link = self.get_genome_download_link(name, mask=mask, version=version) myname = dbname if localname: myname = localname myname = myname.replace(" ", "_") gzipped = False if link.endswith(".gz"): gzipped = True if not os.path.exists(os.path.join(genome_dir, myname)): os.makedirs(os.path.join(genome_dir, myname)) urlcleanup() response = urlopen(link) sys.stderr.write("downloading from {}...\n".format(link)) down_dir = genome_dir fname = os.path.join(genome_dir, myname, myname + ".fa") if regex: down_dir = mkdtemp() fname = os.path.join(down_dir, myname + ".fa") with open(fname, "wb") as f_out: if gzipped: # Supports both Python 2.7 as well as 3 with gzip.GzipFile(fileobj=io.BytesIO(response.read())) as f_in: shutil.copyfileobj(f_in, f_out) else: f_out.write(response.read()) sys.stderr.write("done...\n") if link.endswith("tar.gz"): self.tar_to_bigfile(fname, fname) if hasattr(self, '_post_process_download'): self._post_process_download(name, down_dir, mask) if regex: infa = fname outfa = os.path.join(genome_dir, myname, myname + ".fa") filter_fasta( infa, outfa, regex=regex, v=invert_match, force=True ) not_included = [k for k in Fasta(infa).keys() if k not in Fasta(outfa).keys()] shutil.rmtree(down_dir) fname = outfa sys.stderr.write("name: {}\n".format(dbname)) sys.stderr.write("local name: {}\n".format(myname)) sys.stderr.write("fasta: {}\n".format(fname)) # Create readme with information readme = os.path.join(genome_dir, myname, "README.txt") with open(readme, "w") as f: f.write("name: {}\n".format(myname)) f.write("original name: {}\n".format(dbname)) f.write("original filename: {}\n".format(os.path.split(link)[-1])) f.write("url: {}\n".format(link)) f.write("mask: {}\n".format(mask)) f.write("date: {}\n".format(time.strftime("%Y-%m-%d %H:%M:%S"))) if regex: if invert_match: f.write("regex: {} (inverted match)\n".format(regex)) else: f.write("regex: {}\n".format(regex)) f.write("sequences that were excluded:\n") for seq in not_included: f.write("\t{}\n".format(seq)) # return myname
def _post_process_download(self, name, genome_dir, mask="soft"): """ Replace accessions with sequence names in fasta file. Parameters ---------- name : str NCBI genome name genome_dir : str Genome directory """ # Get the FTP url for this specific genome and download # the assembly report for genome in self.genomes: if genome["asm_name"] == name: url = genome["ftp_path"] url += "/" + url.split("/")[-1] + "_assembly_report.txt" break # Create mapping of accessions to names tr = {} urlcleanup() response = urlopen(url) for line in response.read().decode('utf-8').splitlines(): if line.startswith("#"): continue vals = line.strip().split("\t") tr[vals[6]] = vals[0] name = name.replace(" ", "_") # Check of the original genome fasta exists fa = os.path.join(genome_dir, name, "{}.fa".format(name)) if not os.path.exists(fa): raise Exception("Genome fasta file not found, {}".format(fa)) # Use a tmp file and replace the names new_fa = os.path.join( genome_dir, name, ".process.{}.fa".format(name) ) if mask == "hard": sys.stderr.write("masking lower-case.\n") with open(fa) as old: with open(new_fa, "w") as new: for line in old: if line.startswith(">"): desc = line.strip()[1:] name = desc.split(" ")[0] new.write(">{} {}\n".format( tr.get(name, name), desc )) elif mask == "hard": new.write(re.sub('[actg]', 'N', line)) else: new.write(line) # Rename tmp file to real genome file shutil.move(new_fa, fa)
def download(): count = 0 # Set the limits of the range according to the "id" keys for i in range(1, 2000): count += 1 try: if retain_original_names: urr.urlretrieve(link + str(i)) else: urr.urlretrieve(link + str(i), "Lecture " + str(count)) except AttributeError: with urr.urlopen(link + str(i)) as in_data, open("Lecture " + str(count), 'wb') as out_video: copyfileobj(in_data, out_video) i += ticker time.sleep(buffer_time) print (str(count) + " videos have been downloaded.") # Removing any temporary files left behind urr.urlcleanup() ############################################################################################################## ## Known Issues ## If the downloaded files are named "Lecture 1,2,3 ..." even though retain_original_names is set to True - ## 1. Python Org as finally deprecated the urlretrieve attribute ## 2. I still haven't figured out how to scrape Coursera's website ##############################################################################################################