def manage_download(path, url): # report_error mangles return values; so we don't use it here try: download_file(url, path) print " Downloaded, processing..." return True except Exception, e: print " Couldn't fetch URL", e return False
def dump_video(video_url, video_counter, root, session): vreq = session.get(video_url) vfp = open(root + "video%d.html" % video_counter, "wb") vfp.write(vreq.content) vfp.close() dom = fromstring(vreq.content) player_div = dom.find(".//div[@id='UserVideoPlayerObjectDiv']") obj = demjson.decode(player_div.getnext().text[58:-11]) file_url = obj['modes'][1]['config']['file'] download_file(session, file_url, root+"video%d.flv"%video_counter)
def fetch_food_menu( date=datetime.today() ): # calculate the first day of the week monday = (date - timedelta(date.weekday())).date() # filename format: 'menusitisis_YYYYMMDD' filename = 'menusitisis_%d%02d%02d' % (monday.year, monday.month, monday.day) doc_path = dir_name + filename + '.doc' html_path = dir_name + filename + '.html' # create the folder path, if necessary if not path.exists(dir_name): makedirs(dir_name) # download the doc file logger.debug('Trying to fetch "%s"' % doc_path) download_file(link + filename + '.doc', doc_path) if not path.exists(doc_path): return None if not _convert_to_html(doc_path, html_path): return None # reads the html code from disk file_html = open(html_path, 'r') html = file_html.read() file_html.close() # Parse the html code # logger.debug('Trying to parse...') try: food_menu = _parse_html(html) for i in xrange(7): date_ = monday + timedelta(days=i) food_menu[i]['date'] = _date_to_datetime(date_) except Exception as ex: logger.error(ex) return None # Update the database # try: _update_database(food_menu, monday) except OperationFailure as ex: logger.error('DB Error: %s' % ex) return None except Exception as ex: logger.error(ex) return None return food_menu
def download_model(model_name, dst_dir='./', meta_info=None): if meta_info is None: meta_info = _default_model_info meta_info = dict(meta_info) if model_name not in meta_info: return (None, 0) if not os.path.isdir(dst_dir): os.mkdir(dst_dir) meta = dict(meta_info[model_name]) assert 'symbol' in meta, "missing symbol url" model_name = os.path.join(dst_dir, model_name) download_file(meta['symbol'], model_name+'-symbol.json') assert 'params' in meta, "mssing parameter file url" download_file(meta['params'], model_name+'-0000.params') return (model_name, 0)
def mac_install_cmake(self): """Check for and install cmake. Assumes that if cmake is already installed, then the user has correctly set their path variable such that the command "cmake --version" will work. Raises: FileDownloadError: If the cmake tar fails to download, or is incorrectly downloaded. ExtractionError: If the cmake tar cannot be properly extracted. """ if find_executable("cmake"): logging.info("CMake already installed.") return cmake_version = util.get_file_name( CMAKE_VERSIONS.get(self.version)[0], False) location = util.check_dir(self.cmake_path, cmake_version, "bin/cmake") if location: self.cmake_path = location logging.info("CMake found at " + self.cmake_path) return logging.info("CMake not installed. Downloading now.") url, file_hash = CMAKE_VERSIONS.get(self.os_version, (None, None)) url = urlparse.urljoin(CMAKE_DOWNLOAD_PREFIX, url) location = os.path.join(common.BASE_DIR, "cmake.tar.gz") location = util.download_file(url, location, "cmake", file_hash) if not location: raise common.FileDownloadError("https://cmake.org/download/", "Please " "rerun this script afterwards with the " "flag\n\t--cmake=/path/to/cmake") if not util.extract_tarfile(location, "r:gz", self.cmake_path, "cmake"): raise common.ExtractionError(location) logging.info("CMake successfully installed.")
def mac_install_cwebp(self): """Check for and install cwebp. Assumes that if cwebp is already installed, then the user has correctly set their path variable such that the command "cwebp -h" will work. Raises: FileDownloadError: If the cwebp tar fails to download, or is incorrectly downloaded. ExtractionError: If the cwebp tar cannot be properly extracted. """ if find_executable("cwebp"): logging.info("cwebp already installed.") return location = util.check_dir(self.cwebp_path, CWEBP_VERSION, "cwebp") if location: self.cwebp_path = location logging.info("cwebp found at " + self.cwebp_path) return logging.info("cwebp not installed. Downloading now.") location = os.path.join(common.BASE_DIR, "cwebp.tar.gz") location = util.download_file(CWEBP_URL, location, "cwebp", CWEBP_HASH) if not location: raise common.FileDownloadError("https://developers.google.com/speed/webp/" "docs/precompiled", "Please rerun this " "script afterwards with the flag\n" "\t--cwebp=/path/to/cwebp") if not util.extract_tarfile(location, "r:gz", self.cwebp_path, "cwebp"): raise common.ExtractionError(location) logging.info("cwebp successfully installed.")
def windows_install_python(self): """Checks for and installs at least Python 2.7.8. Raises: FileDownloadError: If the Python installer fails to download, or is downloaded incorrectly. InstallInterruptError: If the user cancels the wait for installation of ImageMagick. InstallFailedError: If msiexec fails, or Python cannot be installed. """ if find_executable("python"): if check_python_version(): logging.info("Python already installed.") return else: logging.info("Python version not sufficient. Updating now.") else: logging.info("Python not installed. Downloading now.") url, file_hash = PYTHON_VERSIONS.get(self.version) url = PYTHON_BASE_URL + url location = os.path.join(common.BASE_DIR, "python.msi") location = util.download_file(url, location, "python", file_hash) if not location: raise common.FileDownloadError("https://www.python.org/downloads/release/" "python-278/", "Please rerun this script " "after completing manual installation.\n") logging.info("Opening Python installer. For convenience, please select the " "'Add python.exe to Path' option.") try: subprocess.call("msiexec /i " + location, shell=True) except subprocess.CalledProcessError: raise common.InstallFailedError("Python", "https://www.python.org/" "downloads/release/python-278/", "Please " "rerun this script after installating " "Python manually.")
def download_binaries(): "Parse config and download dse binaries (local)" # TODO since this is done locally on the cperf tool server, is there any possible concurrency # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove) filename = os.path.join(dse_cache, dse_tarball) dse_url = config['dse_url'] username = config['dse_username'] if 'dse_username' in config else None password = config['dse_password'] if 'dse_password' in config else None url = urljoin(dse_url, dse_tarball) # Fetch the SHA of the tarball: correct_sha = download_file_contents(url+'.sha', username, password).split(" ")[0] assert(len(correct_sha) == 64, 'Failed to download sha file: {}'.format(correct_sha)) if os.path.exists(filename): print("Already in cache: {}".format(filename)) real_sha = digest_file(filename) if real_sha != correct_sha: print("Invalid SHA for '{}'. It will be removed".format(filename)) os.remove(filename) else: return # Fetch the tarball: request = download_file(url, filename, username, password) real_sha = digest_file(filename) # Verify the SHA of the tarball: if real_sha != correct_sha: raise AssertionError( ('SHA of DSE tarball was not verified. should have been: ' '{correct_sha} but saw {real_sha}').format(correct_sha=correct_sha, real_sha=real_sha))
def windows_install_cwebp(self): """Check for and install cwebp in given directory. Raises: FileDownloadError: If the cwebp zip fails to download, or is downloaded incorrectly. """ if find_executable("cwebp"): if check_cwebp_version(): logging.info("cwebp already installed.") return else: logging.info("cwebp version not sufficient. Updating now.") else: location = util.check_dir(self.cwebp_path, CWEBP_VERSIONS.get(self.version)[0], "\\bin\\cmake.exe") if location: logging.info("CMake already installed.") self.cmake_path = location return version, file_hash = CWEBP_VERSIONS.get(self.version) logging.info("cwebp not installed. Downloading now...") url = CWEBP_BASE_URL + version + ".zip" location = os.path.join(common.BASE_DIR, "cwebp.zip") location = util.download_file(url, location, "cwebp", file_hash) if not location: raise common.FileDownloadError("https://developers.google.com/speed/webp/" "docs/precompiled", "Please rerun this " "script afterwards with the flag\n\t" "--cmake=\\path\\to\\cmake") util.extract_zipfile(location, "r", self.cwebp_path, "cwebp") logging.info("cwebp successfully installed.")
def windows_install_cmake(self): """Check for and install cmake. Raises: FileDownloadError: If the CMake zip fails to download, or is downloaded incorrectly. """ if find_executable("cmake"): if check_cmake_version(): logging.info("CMake already installed.") return else: logging.info("CMake version not sufficient. Updating now.") else: location = util.check_dir(self.cmake_path, CMAKE_VERSION, os.path.join("bin", "cmake.exe")) if location: logging.info("CMake already installed.") self.cmake_path = location return else: logging.info("CMake not installed. Downloading now...") location = os.path.join(common.BASE_DIR, "cmake.zip") location = util.download_file(CMAKE_URL, location, "cmake", CMAKE_HASH) if not location: raise common.FileDownloadError("https://cmake.org/download/", "Please " "rerun this script afterwards with the " "flag\n\t--cmake=\\path\\to\\cmake") util.extract_zipfile(location, "r", self.cmake_path, "cmake") logging.info("cmake successfully installed.")
def windows_fix_directx(self): """Attempt to fix problems DirectX may be having with Visual Studio. DirectX comes pre-installed on Windows 7 and up, but having Visual C++ 2010 or higher may give an "S1023" error due to it being newer than the latest version of DirectX, June 2010 DirectX SDK. This can be fixed by reinstalling DirectX once Visual C++ has been established. Raises: FileDownloadError: If the Visual Studio installer fails to download, or is downloaded incorrectly. """ logging.info("Attempting to fix problems with DirectX...") try: subprocess.call("MsiExec.exe /passive /X{F0C3E5D1-1ADE-321E-8167-" "68EF0DE699A5}", shell=True) subprocess.call("MsiExec.exe /passive /X{1D8E6291-B0D5-35EC-8441-" "6616F567A0F7}", shell=True) except subprocess.CalledProcessError: logging.warning("MsiExec.exe failed. Could not resolve conflicts with " "DirectX and Visual Studio.") return location = os.path.join(common.BASE_DIR, "directx.exe") location = util.download_file(DIRECTX_URL, location, "DirectX", DIRECTX_HASH) if not location: raise common.FileDownloadError("http://www.microsoft.com/en-us/download/" "details.aspx?id=6812", "Please rerun " "this script after completing manual " "installation.") subprocess.call("start cmd /c " + location, shell=True) logging.info("DirectX successfully reinstalled.")
def mac_install_ant(self): """Check for and install Apache Ant. Raises: FileDownloadError: If the ant tar fails to download, or is incorrectly downloaded. ExtractionError: If the ant tar cannot be properly extracted. """ if find_executable("ant"): logging.info("Apache Ant already installed.") return location = util.check_dir(self.ant_path, ANT_VERSION, "bin/ant") if location: self.ant_path = location logging.info("Apache Ant already installed.") return logging.info("Apache Ant not installed. Installing now.") location = os.path.join(common.BASE_DIR, "ant.tar.gz") location = util.download_file(ANT_URL, location, "Ant", ANT_HASH) if not location: raise common.FileDownloadError("https://www.apache.org/dist/ant/" "binaries/", "Please rerun this script " "again afterwards.") if not util.extract_tarfile(location, "r:gz", self.ant_path, "Ant"): raise common.ExtractionError(location) logging.info("Apache Ant successfully installed.")
def download_binaries(): "Parse config and download dse binaries (local)" # TODO since this is done locally on the cperf tool server, is there any possible concurrency # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove) filename = os.path.join(dse_cache, dse_tarball) dse_url = config['dse_url'] username = config['dse_username'] if 'dse_username' in config else None password = config['dse_password'] if 'dse_password' in config else None url = urljoin(dse_url, dse_tarball) # Fetch the SHA of the tarball: download_file_contents returns the request.text of the url. # the sha file has the format '874c11f7634974fb41006d30199b55b59fd124db ?./dse-5.0.0-bin.tar.gz' # so we split on the space and then check that the sha hexidecimal is 40 characters correct_sha = download_file_contents(url+'.sha', username, password).split(" ")[0] assert(len(correct_sha) == 40), 'Failed to download sha file: {}'.format(correct_sha) if os.path.exists(filename): logger.info("Already in cache: {}".format(filename)) real_sha = digest_file(filename) if real_sha != correct_sha: logger.info("Invalid SHA for '{}'. It will be removed".format(filename)) os.remove(filename) else: return # Fetch the tarball: request = download_file(url, filename, username, password) real_sha = digest_file(filename) # Verify the SHA of the tarball: if real_sha != correct_sha: raise AssertionError( ('SHA of DSE tarball was not verified. should have been: ' '{correct_sha} but saw {real_sha}').format(correct_sha=correct_sha, real_sha=real_sha))
def windows_install_imagemagick(self): """Check for and install ImageMagick. Raises: FileDownloadError: If the ImageMagick installer fails to download, or is downloaded incorrectly. InstallInterruptError: If the user cancels the wait for installation of ImageMagick. """ if find_executable("convert"): logging.info("ImageMagick is already installed.") return logging.info("ImageMagick not installed. Downloading now...") url, file_hash = IMAGEMAGICK_VERSIONS.get(self.version) url = IMAGEMAGICK_BASE_URL + url location = os.path.join(common.BASE_DIR, "imagemagick.exe") location = util.download_file(url, location, "imagemagick", file_hash) if not location: raise common.FileDownloadError("http://www.imagemagick.org/script/binary-" "releases.php", "Please rerun this script " "after completing manual installation.\n") subprocess.call("start cmd /c " + location, shell=True) if not util.wait_for_installation("convert"): raise common.InstallInterruptError("ImageMagick") logging.info("ImageMagick successfully installed.")
def android_download_ndk(self, directory): """Checks OS version and downloads the appropriate Android NDK. Args: directory: String indication of location to unpack NDK Raises: FileDownloadError: NDK bin or exe fails to download InstallInterruptError: if the wait for the NDK """ if self.system == common.LINUX: os_version = subprocess.check_output("uname -m", shell=True) if os_version.strip() == "x86_64": url, file_hash = NDK_VERSIONS.get(common.LINUX_64) else: url, file_hash = NDK_VERSIONS.get(common.LINUX_32) elif self.system == common.WINDOWS: os_version = platform.architecture()[0] if os_version == "64bit": url, file_hash = NDK_VERSIONS.get(common.WINDOWS_64) else: url, file_hash = NDK_VERSIONS.get(common.WINDOWS_32) else: # self.system = common.MAC url, file_hash = NDK_VERSIONS.get(self.system) filetype = util.get_file_type(url) url = NDK_DOWNLOAD_PREFIX + url ndk_location = os.path.join(directory, "ndk." + filetype) ndk_location = util.download_file(url, ndk_location, "Android NDK", file_hash) if not ndk_location: raise common.FileDownloadError("http://developer.android.com/ndk/" "downloads/index.html", "Please rerun " "this script afterwards with the flag\n" "\t--android_ndk=/path/to/android_ndk") if filetype == "bin": # Allow execution by all parties. os.chmod(ndk_location, 0755) current_dir = os.getcwd() os.chdir(common.BASE_DIR) os.system(ndk_location) os.chdir(current_dir) os.remove(ndk_location) elif filetype == "exe": os.chdir(self.ndk_path) subprocess.call("start cmd /c " + ndk_location, shell=True) # toolchain-licenses\COPYING is one of the last things to be extracted. if not util.wait_for_installation("COPYING", search=True, basedir=self.ndk_path): raise common.InstallInterruptError("Android NDK") os.chdir(current_dir) else: raise common.UnknownFileTypeError(filetype, "Please manually extract " "Android NDK and rerun this script " "afterwards with the flag\n\t" "--android_ndk=/path/to/android_ndk")
def download_lab(course): print(" ------------- Start downloading " + course + "'s Lab ------------- ") url = "https://webcms3.cse.unsw.edu.au/" + course + "/18s1" r = client.get(url, verify=False) soup = BeautifulSoup(r.text, "lxml") ### Lab Activities, Labs sider_bar = soup.find('div', id='sidebar') lab = sider_bar.find('a', string=re.compile('Lab')) if not lab: print(course + " may not have Lab") return location = lab['href'] url_lec = root + location r = client.get(url_lec, verify=False) soup = BeautifulSoup(r.text, "lxml") blocks = soup.find_all('div', 'panel panel-primary') dict[course]["lab"] = {} for block in blocks: week_str = block.h4.text.strip() small = block.h4.small.text.strip() week_str = week_str.replace(small, "") week_str = week_str.replace("active", "") week_str = re.sub(r'\n', "", week_str) week_str = " ".join(week_str.split()) week_str = week_str.strip() dict[course]["lab"][week_str] = {} path = os.path.join(data_path, course, week_str, "lab") if not os.path.exists(path): os.makedirs(path) items = block.find_all('li', 'list-group-item') for item in items: name = item.find('a').text.strip() if len(name) <= 0: continue name = " ".join(name.split()) pdf = item.div.find('a', title="Download") if pdf: pdf_url = root + pdf.get('href') path = os.path.join(data_path, course, week_str, "lab", name) path.replace("\"", "ยง") succ = util.download_file(pdf_url, path) name = name.replace(".", "&") dict[course]["lab"][week_str][name] = pdf_url print(" ------------- Lab download complete. :^ ) ------------- ")
def resolve(self, URL, id, context): logger.debug("Fetching %s ..." % URL) #determine cache path url = urlparse.urlparse(URL) # Handle relative paths for network locations if url.netloc: self.last_url = url else: if not self.last_url: raise ValueError("Invalid URL provided for DTD: %s" % URL) url = urlparse.urlparse(urlparse.urljoin(self.last_url.geturl(), URL)) local_base_directory = os.path.join(self.cache, url.netloc) local_file = local_base_directory + url.path #cache if necessary if not os.path.exists(local_file): if not os.path.exists(os.path.split(local_file)[0]): os.makedirs(os.path.split(local_file)[0]) download_file(url.geturl(), local_file) #resolve the cached file return self.resolve_file(open(local_file), context, base_url=URL)
def scrape_videos(user_id, root, session): print(" - videos") page_counter = 1 video_counter = 1 should_continue = True while(should_continue): print(" - page %d" % page_counter) req = session.get(VIDEO_OVERVIEW_URL % (user_id, page_counter)) dom = fromstring(req.content) folder = root + "videos/" os.makedirs(folder, exist_ok=True) fp = open(folder + "gallery-page%d.html" % page_counter, 'wb') fp.write(req.content) fp.close() # we're doing a depth first search thumbs = dom.xpath(".//div[@class='galleryThumb']") for thumb in thumbs: print(" - video %d" % video_counter) video_url = "http://www.arto.com" + thumb.getchildren()[0].get('href') video_thumb = thumb.getchildren()[0][0].get('src') download_file(session, video_thumb, folder+"video%d.jpg"%video_counter) dump_video(video_url, video_counter, folder, session) video_counter += 1 e = dom.xpath('.//a[text()="Nรฆste"]') page_counter += 1 if not e: should_continue = False
def prepare_data(): data_name = os.path.join('data', 'friendster-300K.npz') t_name = os.path.join('data', 'friendster-300K-T.npz') x_name = os.path.join('data', 'friendster-300K-x.npy') b_name = os.path.join('data', 'friendster-300K-b.npy') util.download_file( 'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K.npz', data_name) util.download_file( 'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K-T.npz', t_name) util.download_file( 'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K-x.npy', x_name) util.download_file( 'https://mxnet-experiment.s3.amazonaws.com/friendster-dataset/friendster-300K-b.npy', b_name) return sp.sparse.load_npz(data_name), sp.sparse.load_npz(t_name), np.load( x_name), np.load(b_name)
def __download_model(): url_weight = "https://pjreddie.com/media/files/yolov3.weights" url_cfg = "https://github.com/pjreddie/darknet/blob/master/cfg/yolov3.cfg?raw=true" url_names = "https://github.com/pjreddie/darknet/blob/master/data/coco.names?raw=true" fileAlreadyExists = os.path.isfile(PATH_TO_WEIGHTS) if not fileAlreadyExists: if not os.path.exists(os.path.join(PATH_TO_MODELS, MODEL_NAME)): os.makedirs(os.path.join(PATH_TO_MODELS, MODEL_NAME)) print('Downloading frozen inference graph (.weight, .cfg, .classes): ') download_file(url_weight, PATH_TO_WEIGHTS) download_file(url_cfg, PATH_TO_CFG) download_file(url_names, PATH_TO_CLASSES)
def main(): #ๅฎฎๅ็ใฎใใผใฟใฎexcelใใกใคใซ๏ผๅ ฌๅผ๏ผ url="https://www.pref.miyagi.jp/uploaded/attachment/826977.xlsx" file_name=download_file(url) wb=load_workbook(file_name,data_only=True) ws=wb["ๆฅๅฅ้่จ๏ผHPๆฒ่ผ๏ผ"] #ใใผใฟใ้ใใฆใใ today=datetime.date.today() d=[] max_row=ws.max_row for i in range(2,max_row): dt=ws.cell(i,1).value #ๆฅไป #cellใซ่จๅ ฅใใใฆใใชใ่กใซๆฅใใ็ตไบ if dt is None: break #ไปๆฅไปฅ้ใฎใปใซใฎ่กใซ่กใใจ็ตไบ date=dt.date() if date>today: break date_isoformat=date.isoformat() count=ws.cell(i,7).value #ๅ่จ #ไปๆฅใฎๆฅไปใฎๅใ0ไบบใฎๆใฏใ่จๅ ฅใใใฆใใชใใใจใจใใฆใใซใผใ็ตไบใใ if date==today and count==0: break print(date,count) d.append({ "date":date_isoformat, "count":count }) #jsonใซๆธใใ ใ data={ "data":d } print(data) os.makedirs("data",exist_ok=True) with open("data/miyagi_data.json","w") as f: json.dump(data,f,indent=4)
def mac_install_macports(self): """Check for and install MacPorts. Raises: FileDownloadError: If the MacPorts package fails to download, or is incorrectly downloaded. UnknownFileTypeError: If the type of the downloaded package does not match any of the supported types. """ if os.path.isfile(MACPORTS_LOCATION): logging.info("MacPorts already installed.") return else: logging.info("MacPorts not installed. Downloading now.") url, file_hash = MACPORTS_VERSIONS.get(self.os_version) url = MACPORTS_DOWNLOAD_PREFIX + url suffix = util.get_file_type(url) location = os.path.join(common.BASE_DIR, "macports." + suffix) location = util.download_file(url, location, "macports", file_hash) if not location: raise common.FileDownloadError( "https://guide.macports.org/chunked/" "installing.macports.html", "Please rerun " "this script again afterwards.") logging.info( "Installing Mac Ports. Sudo may prompt you for your password.") if suffix == "pkg": try: subprocess.call("sudo installer -pkg " + location + " -target /", shell=True) except subprocess.CalledProcessError: raise common.PermissionDeniedError( "installer", "Please enter your " "password to install MacPorts") elif suffix == "dmg": subprocess.call("hdiutil attach " + location, shell=True) else: raise common.UnknownFileTypeError( suffix, "Please manually install " "MacPorts, or run this script again " "with the flag\n\t--no_macports") self.bash_profile_changed = True # Mac ports installation will probably
def run(self): queueLock.acquire() print '-' * 10, 'Start threadID: %d ' % self.threadID, '-' * 10, '\n' queueLock.release() if not workQueue.empty(): queueLock.acquire() data = self.q.get() downloadpath = os.path.join(self.dirpath, data['RDSInstanse']) queueLock.release() for download in data['bakdownloadurldict'].items(): objectpath = download_file(downloadpath, str(download[1]), str(download[0])) putobjectpath = os.path.join( str(self.newdate), os.path.join(str(data['RDSInstanse']), str(download[0]))) print putobjectpath PutObjectFromFile(self.bucket, putobjectpath, objectpath) print 'put %s to oss successful' % putobjectpath else: print '-' * 10, 'Start threadID: %d ' % self.threadID, '-' * 10, '\n'
def load_cifar100(data_path, label_mode): """ Download cifar100 if necessary and load the images and labels for training and test sets. Args: data_path: (str) path to the directory where CIFAR100 is, or where to download it to. label_mode: (str) type of label; one of `fine` or `coarse`. Returns: train_imgs, train_labels, test_imgs, test_labels. """ cifar_url = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' cifar_local_folder = 'cifar-100-python' # Check if dataset exists and download it if does not name = cifar_url.split('/')[-1] file_path = util.download_file(data_path, name, cifar_url) tarfile.open(file_path, 'r:gz').extractall(data_path) input_dir = os.path.join(data_path, cifar_local_folder) # Extract train and valid ################################################################## d = unpickle(os.path.join(input_dir, 'train')) train_imgs = d['data'] train_labels = np.array(d[f'{label_mode}_labels'], dtype=np.uint8) train_imgs = train_imgs.reshape(TRAIN_EX, CIFAR_SHAPE[0], CIFAR_SHAPE[1], CIFAR_SHAPE[2]) # Transpose images to shape = [TRAIN_EX, height, width, channels] train_imgs = np.transpose(train_imgs, (0, 2, 3, 1)) # Extract test ############################################################################ d = unpickle(os.path.join(input_dir, 'test')) test_imgs = d['data'] test_labels = np.array(d[f'{label_mode}_labels'], dtype=np.uint8) test_imgs = test_imgs.reshape(TEST_EX, CIFAR_SHAPE[0], CIFAR_SHAPE[1], CIFAR_SHAPE[2]) # Transpose images to shape = [TEST_EX, height, width, channels] test_imgs = np.transpose(test_imgs, (0, 2, 3, 1)) return train_imgs, train_labels, test_imgs, test_labels
def android_download_sdk(self, directory): """Download Android SDK and unpack into specified directory. Args: directory: String indication of location to unpack SDK to Raises: FileDownloadError: SDK tar or zip fails to download UnknownFileTypeError: If the file downloaded is neither a tar or a zip, and cannot be extracted. """ url, file_hash = SDK_VERSIONS.get(self.system) suffix = util.get_file_type(url) sdk_location = os.path.join(directory, "sdk." + suffix) sdk_location = util.download_file(url, sdk_location, "Android SDK", file_hash) if not sdk_location: raise common.FileDownloadError( "http://developer.android.com/sdk/index." "html#", "Please rerun this script " "afterwards with the flag\n" "\t--android_sdk=/path/to/android_sdk") if suffix == "tgz": util.extract_tarfile(sdk_location, "r", directory, "Android SDK") elif suffix == "zip": util.extract_zipfile(sdk_location, "r", directory, "Android SDK") else: raise common.UnknownFileTypeError( suffix, "Please manually extract " "Android SDK and rerun this script " "afterwards with the flag\n" "\t--android_sdk=/path/to/android_sdk") if self.system == common.MAC: # Sometimes, permissions aren't set correctly on tools/android on OSX. # Change permissions to allow execution by user android = os.path.join(directory, SDK_NAMES.get(self.system), "tools", "android") curr_permissions = os.stat(android) os.chmod(android, curr_permissions.st_mode | stat.S_IXUSR) # Update self.sdk_path to now include the SDK name self.sdk_path = os.path.join(self.sdk_path, SDK_NAMES.get(self.system))
def download_binaries(): "Parse config and download dse binaries (local)" # TODO since this is done locally on the cperf tool server, is there any possible concurrency # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove) filename = os.path.join(dse_cache_local, dse_tarball) dse_url = config['dse_url'] username = config['dse_username'] if 'dse_username' in config else None password = config['dse_password'] if 'dse_password' in config else None url = urljoin(dse_url, dse_tarball) # Fetch the SHA of the tarball: download_file_contents returns the request.text of the url. # the sha file has the format '874c11f7634974fb41006d30199b55b59fd124db ?./dse-5.0.0-bin.tar.gz' # so we split on the space and then check that the sha hexidecimal is 40 characters correct_sha = download_file_contents(url + '.sha', username, password).split(" ")[0] assert (len(correct_sha) == 40 ), 'Failed to download sha file: {}'.format(correct_sha) if os.path.exists(filename): logger.info("Already in cache: {}".format(filename)) real_sha = digest_file(filename) if real_sha != correct_sha: logger.info( "Invalid SHA for '{}'. It will be removed".format(filename)) os.remove(filename) else: return # Fetch the tarball: request = download_file(url, filename, username, password) real_sha = digest_file(filename) # Verify the SHA of the tarball: if real_sha != correct_sha: raise AssertionError( ('SHA of DSE tarball was not verified. should have been: ' '{correct_sha} but saw {real_sha}').format( correct_sha=correct_sha, real_sha=real_sha))
def android_download_sdk(self, directory): """Download Android SDK and unpack into specified directory. Args: directory: String indication of location to unpack SDK to Raises: FileDownloadError: SDK tar or zip fails to download UnknownFileTypeError: If the file downloaded is neither a tar or a zip, and cannot be extracted. """ url, file_hash = SDK_VERSIONS.get(self.system) suffix = util.get_file_type(url) sdk_location = os.path.join(directory, "sdk." + suffix) sdk_location = util.download_file(url, sdk_location, "Android SDK", file_hash) if not sdk_location: raise common.FileDownloadError("http://developer.android.com/sdk/index." "html#", "Please rerun this script " "afterwards with the flag\n" "\t--android_sdk=/path/to/android_sdk") if suffix == "tgz": util.extract_tarfile(sdk_location, "r", directory, "Android SDK") elif suffix == "zip": util.extract_zipfile(sdk_location, "r", directory, "Android SDK") else: raise common.UnknownFileTypeError(suffix, "Please manually extract " "Android SDK and rerun this script " "afterwards with the flag\n" "\t--android_sdk=/path/to/android_sdk") if self.system == common.MAC: # Sometimes, permissions aren't set correctly on tools/android on OSX. # Change permissions to allow execution by user android = os.path.join(directory, SDK_NAMES.get(self.system), "tools", "android") curr_permissions = os.stat(android) os.chmod(android, curr_permissions.st_mode | stat.S_IXUSR) # Update self.sdk_path to now include the SDK name self.sdk_path = os.path.join(self.sdk_path, SDK_NAMES.get(self.system))
def mac_install_macports(self): """Check for and install MacPorts. Raises: FileDownloadError: If the MacPorts package fails to download, or is incorrectly downloaded. UnknownFileTypeError: If the type of the downloaded package does not match any of the supported types. """ if os.path.isfile(MACPORTS_LOCATION): logging.info("MacPorts already installed.") return else: logging.info("MacPorts not installed. Downloading now.") url, file_hash = MACPORTS_VERSIONS.get(self.os_version) url = MACPORTS_DOWNLOAD_PREFIX + url suffix = util.get_file_type(url) location = os.path.join(common.BASE_DIR, "macports." + suffix) location = util.download_file(url, location, "macports", file_hash) if not location: raise common.FileDownloadError("https://guide.macports.org/chunked/" "installing.macports.html", "Please rerun " "this script again afterwards.") logging.info("Installing Mac Ports. Sudo may prompt you for your password.") if suffix == "pkg": try: subprocess.call("sudo installer -pkg " + location + " -target /", shell=True) except subprocess.CalledProcessError: raise common.PermissionDeniedError("installer", "Please enter your " "password to install MacPorts") elif suffix == "dmg": subprocess.call("hdiutil attach " + location, shell=True) else: raise common.UnknownFileTypeError(suffix, "Please manually install " "MacPorts, or run this script again " "with the flag\n\t--no_macports") self.bash_profile_changed = True # Mac ports installation will probably
def windows_setup_visual_studio(self): """Check for compatible versions of Visual Studio and Visual C++. If no compatible version of Visual Studio is detected, download default version. If a compatible version is detected, check if a compatible version of the C++ compiler has been installed. Raises: FileDownloadError: If the Visual Studio installer fails to download, or is downloaded incorrectly. """ for line in self.programs.splitlines(): if VS_NAME_PREFIX in line: for name in get_all_vs(): if line.strip() == name: self.vs_version = VS_COMPATIBLE_VERSIONS.get(name.split(" ")[-1]) logging.info("Visual Studio already installed.") self.windows_check_compiler() return logging.info("Visual Studio not installed. Installing " + VS_DEFAULT_NAME + " now...") location = os.path.join(common.BASE_DIR, "vs_community.exe") location = util.download_file(VS_DEFAULT_URL, location, "Visual Studio Installer", VS_DEFAULT_HASH) if not location: raise common.FileDownloadError("https://www.visualstudio.com/en-us/" "downloads/download-visual-studio-vs.aspx", "Please rerun this script after " "completing manual installation.") logging.info("Now lauching Visual Stusio Installer.\n*** Please ensure you " "select \"Visual C++\" ***\nYour computer will " "likely need to be restarted. If so, click 'Restart Now' when " "prompted and rerun this script after reboot.\nIf no restart " "is required, click 'Finish' and rerun script.") subprocess.call("cmd /k " + location, shell=True) # cmd /k will stop the script, but just in case, exit sys.exit()
def update_java(self): """Update Java Runtime Environment. There's a bug in the Java installer that sees Yosemite and El Capitan (10.10 and 10.11) as '10.1', and hence the android won't run. The official Apple package, which is installed in this function, doesn't have that bug. Raises: InstallInterruptError: If the wait for installing Java update was cancelled. """ if self.os_version < OSX_10_10_YOSEMITE: return logging.info("Java update required by Android.") location = os.path.join(common.BASE_DIR, "java.dmg") location = util.download_file(JAVA_UPDATE_URL, location, "java", JAVA_UPDATE_HASH) if not location: logging.warn("Please visit https://support.apple.com/kb/DL1572 for " "download link and extraction instructions.\nPlease rerun " "this script afterwards to complete setup.") logging.info("Finder will open. Double click on \"JavaForOSX.pgk\" to " "continue installation") subprocess.call("hdiutil attach " + location, shell=True)
def download_binaries(): "Parse config and download dse binaries (local)" # TODO since this is done locally on the cperf tool server, is there any possible concurrency # issue .. Or maybe we should simply keep a cache on each host? (Comment to remove) filename = os.path.join(dse_cache, dse_tarball) dse_url = config['dse_url'] username = config['dse_username'] if 'dse_username' in config else None password = config['dse_password'] if 'dse_password' in config else None url = urljoin(dse_url, dse_tarball) # Fetch the SHA of the tarball: correct_sha = download_file_contents(url + '.sha', username, password).split(" ")[0] assert (len(correct_sha) == 64, 'Failed to download sha file: {}'.format(correct_sha)) if os.path.exists(filename): print("Already in cache: {}".format(filename)) real_sha = digest_file(filename) if real_sha != correct_sha: print("Invalid SHA for '{}'. It will be removed".format(filename)) os.remove(filename) else: return # Fetch the tarball: request = download_file(url, filename, username, password) real_sha = digest_file(filename) # Verify the SHA of the tarball: if real_sha != correct_sha: raise AssertionError( ('SHA of DSE tarball was not verified. should have been: ' '{correct_sha} but saw {real_sha}').format( correct_sha=correct_sha, real_sha=real_sha))
def mac_install_cmake(self): """Check for and install cmake. Assumes that if cmake is already installed, then the user has correctly set their path variable such that the command "cmake --version" will work. Raises: FileDownloadError: If the cmake tar fails to download, or is incorrectly downloaded. ExtractionError: If the cmake tar cannot be properly extracted. """ if find_executable("cmake"): logging.info("CMake already installed.") return cmake_version = util.get_file_name( CMAKE_VERSIONS.get(self.version)[0], False) location = util.check_dir(self.cmake_path, cmake_version, "bin/cmake") if location: self.cmake_path = location logging.info("CMake found at " + self.cmake_path) return logging.info("CMake not installed. Downloading now.") url, file_hash = CMAKE_VERSIONS.get(self.os_version, (None, None)) url = urlparse.urljoin(CMAKE_DOWNLOAD_PREFIX, url) location = os.path.join(common.BASE_DIR, "cmake.tar.gz") location = util.download_file(url, location, "cmake", file_hash) if not location: raise common.FileDownloadError( "https://cmake.org/download/", "Please " "rerun this script afterwards with the " "flag\n\t--cmake=/path/to/cmake") if not util.extract_tarfile(location, "r:gz", self.cmake_path, "cmake"): raise common.ExtractionError(location) logging.info("CMake successfully installed.")
def process_tweebo(): """Read, convert and save the Tweebo corpus""" download_file(TWEEBO_DAILY547_DL, "Data/TweeboDaily547.conll") download_file(TWEEBO_OCT27_DL, "Data/TweeboOct27.conll") def interpret_conll(path): """ Read a CONLL file line-by-line and export the tags :param path: The path of the file :return: A list of Tag objects. """ logging.info("Reading %s...", path) ret = [] tweebo = ['N', 'O', '^', 'S', 'Z', 'V', 'L', 'M', 'A', 'R', '!', 'D', 'P', '&', 'T', 'X', 'Y', '#', '@', '~', 'U', 'E', '$', ',', 'G'] ref = [u'NOUN', u'PRON', 'NOUN', 'DET', 'NOUN', 'VERB', ''] tagmap = { 'N': u'NOUN', 'O': u'PRON', '^': u'NOUN', 'S': u'X', 'Z': u'NOUN', 'V': u'VERB', 'L': u'PRON', 'M': u'NOUN', 'A': u'ADJ', 'R': u'ADV', '!': u'.', 'D': u'DET', 'P': u'CONJ', '&': u'CONJ', 'T': u'PRT', 'X': u'DET', 'Y': u'DET', '#': u'X', '@': u'NOUN', '~': u'X', 'U': u'X', 'E': u'.', '$': u'NUM', ',': u'.', 'G': u'X' } with open(path, 'r') as fp: for line in fp: line = line.strip() if len(line) == 0: continue line = line.split() word, raw = line t = Tag(word, tagmap[raw]) ret.append(t) return ret d547 = interpret_conll("Data/TweeboDaily547.conll") o24 = interpret_conll("Data/TweeboOct27.conll") with open('Data/TweeboDaily547.pkl', 'w') as fout: logging.info("Saving daily...") pickle.dump(d547, fout, pickle.HIGHEST_PROTOCOL) with open('Data/TweeboOct27.pkl', 'w') as fout: logging.info("Saving Oct...") pickle.dump(o24, fout, pickle.HIGHEST_PROTOCOL)
fname = parts[-1] f, e = os.path.splitext(fname) lasfile = None if e.lower() == '.laz': lasfile = f + '.las' if os.path.exists(lasfile): already_in_stock += 1 print("Already have %s" % lasfile) continue # Fetch the file count += 1 print("Downloading %d/%d %s.." % (count, total, fname), end="") try: if download_file(url, fname): success += 1 except Exception as e: print("..failed, %s" % e) fail += 1 print() if lasfile: # Uncompress the file print("Unpacking %s.." % fname, end="") try: args = [laszip, fname] p = subprocess.check_output(args) unpacked = True except Exception as e: print("..failed, %s" % e)
#!/usr/bin/env python from util import download_file, parse_vulnerable_softwares, send_email from xmlExtractor import XMLReader if __name__ == "__main__": download_file() xmlReader = XMLReader() xmlReader.xml_parser() vulnearble = parse_vulnerable_softwares(xmlReader.get_cves()) if len(vulnearble) > 0: msg = "{}".format("\n".join(vulnearble[::-1])) print msg send_email(msg)
def reply_pic(self, pic_url, ext): log.info("ๆฅๆถๅฐๅพ็๏ผๅฐๅ๏ผ" + pic_url) filename = "%s_%s.%s" % (time.time(), self.conversation_id, ext) util.download_file(pic_url, filename) self.replier.pic(filename) os.remove(filename)
class Club(Best11): """ Contains all info pertaining to an individual club (e.g. Noworry About MJ), given its id. """ def __init__(self, club_id=None, club=None, manager=None): """ Parameters: club_id (int > str) """ super().__init__() if not any((club_id, club, manager)): raise Exception("You must provide either club_id, club or manager") elif club_id: # The club_id has been given. Verify it is num... if not isinstance(club_id, int): # if str number, convert to int if not (r"^\d{1,4}$", club_id): raise ValueError( """Please enter the club_id in number form,\ or use alternative constructors to initialise using club name or manager.""" ) club_id = int(club_id) elif club: # elif passed the club name, get the corresponding club_id for that club club_id = self.club_id_from_club(club) elif manager: # elif passed the club manager, get the corresponding club_id for that manager # if set to user, get username from session if manager == 'user': manager = self.session.username club_id = self.club_id_from_manager(manager) self.club_id = club_id self.params = {'id': self.club_id} self.soup_dict = self.__get_soup_dict() def __repr__(self): # TODO pass def __str__(self): # TODO pass # --- Soup --- def __get_soup_dict(self): """ Returns a dict of multi-use soup elements. e.g. the club_info_table is called by multiple properties. """ request = self.session.request("GET", "vizualizare_club.php?", params=self.params) soup = make_soup(request) # Set instance var for full soup self.soup = soup # Common soup elements club_info = soup.find_all('table')[3].find_all('tr')[1] return { 'club_info': club_info, 'club_info_links': club_info.find_all('a'), 'equipment': soup.find_all('table')[5] } # --- Avatar --- @property def avatar(self): """ Returns the link to a club's avatar. r-type: str """ request = self.session.request("GET", "vizualizare_club.php?", params=self.params) soup = make_soup(request) # Grab avatar link from soup and replace spaces to make working link avatar = soup.find_all('table')[1].find_all('tr')[2].find('img').get( 'src').replace(' ', '%20') # If avatar is the defualt img if '/standard.jpg' in avatar: return False full_link = f"{self.session.MAIN_URL}{avatar}" return full_link def download_avatar(self): """ Downloads club's avatar to current directory. If club's avatar is default, returns False. r-type: str """ if not (avatar := self.avatar): return False try: util.download_file(avatar) except: raise Exception("Could not download avatar!")
json.dump(collection, f) # collect card images for cards in collection image_folder = os.path.join("public", "images") try: os.mkdir(image_folder) except: pass image_extension = ".jpg" # determine missing images image_filelist = set(glob.glob(os.path.join(image_folder, "*"))) missing_images = {x for x in collection if image_filename(x, image_folder, image_extension) not in image_filelist} # download missing card images with rate limiter no_uri = 0 failed = set() for c_id in tqdm.tqdm(missing_images, desc = "downloading missing images"): card = card_index[c_id] if 'image_uris' in card: try: uri = card['image_uris']['normal'] download_file(uri, headers = None, filename = image_filename(c_id, image_folder, image_extension)) time.sleep(0.1) # rate limiter except: failed.add(c_id) else: no_uri += 1 pprint(f"no image uri available for {no_uri} cards") pprint(f"download failed for cards: {failed}") make_prices_overview(collection, card_index)
print(u"Matched jobs found") else: print(u"No package jobs found, aborting") sys.exit(1) if dryrun: print(u"Dry Run only, no files will be downloaded") for job in jobnames: file_paths = u.fetch_job_file_paths(job, jobnames) match = jobnames[job] for filename, url in file_paths: print(u"Downloading file {0}".format(filename)) path = u.get_final_path(job, match, filename) u.download_file(url, path) # Hash file, if it has not changed sha1 hash for this job, we unlink it so it won't be updated by the rest of the script hash = u.hash_file(path) if u.same_hash(filename, hash, conn): print(u"==> File not changed since last run, skipping") os.remove(path) continue elif not dryrun: # as long as we are not a dry run, update the hash if not u.update_hash(filename, hash, conn): print(u"==> Could not save hash, deleting file") os.remove(path) continue else: # always remove file in dryrun os.remove(path)
already_in_stock = 0 print("Files to download: %d" % total) for (folder, url) in everything: outputfolder = os.path.join(base, "Lidar", folder) parts = url.split('/') fname = parts[-1] if not os.path.exists(outputfolder): os.makedirs(outputfolder) os.chdir(outputfolder) count += 1 if os.path.exists(os.path.join(outputfolder, fname)): print("Already have \"%s\" / \"%s\"" % folder, fname) already_in_stock += 1 continue print("Downloading %d/%d %s.." % (count, total, fname), end="") try: success += download_file(url, fname) except Exception as e: print(".. failed, %s" % e) fail += 1 print() print("Downloaded %d, failed to download %d, already had %d" % (success, fail, already_in_stock)) exit(0)
batch_size = 32 num_classes = 10 epochs = 20 hidden_units = 100 learning_rate = 1e-6 clip_norm = 1.0 # the data, split between train and test sets from util import download_file s3_url = 'https://s3.amazonaws.com/neural-networking-book/ch02/notMNIST_3.5.pickle?versionId=j53VUhZj_FXe9iFSN0O.KLedt08.DGy4' pickle_file = download_file(s3_url, 'notMNSIT_3.5.pickle') #pickle_file = './data/notMNIST.pickle' image_size = 28 num_of_labels = 10 with open('./' + pickle_file, 'rb') as f: save = pickle.load(f) training_dataset = save['train_dataset'] training_labels = save['train_labels'] validation_dataset = save['valid_dataset'] validation_labels = save['valid_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] del save # hint to help gc free up memory print('Training set', training_dataset.shape, training_labels.shape) print('Validation set', validation_dataset.shape, validation_labels.shape) print('Test set', test_dataset.shape, test_labels.shape)
def file_report_download(self, report, url_params={}): if self.token: token = self.token params = util.merge_params(self.params, url_params) url = "%s/file/report/download" % (self.connection_string) return util.download_file(url, params, {'token':token,'report':report})
trained_path = './trained' base_url = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/' if not os.path.exists(data_path): os.mkdir(data_path) if not os.path.isdir(trained_path): os.mkdir(trained_path) flowers_archive_path = os.path.join(data_path, '102flowers.tgz') img_label_path = os.path.join(data_path, 'imagelabels.mat') setid_path = os.path.join(data_path, 'setid.mat') if not os.path.isfile(flowers_archive_path): print ('Downloading images...') util.download_file(base_url + '102flowers.tgz') else: print("Images data already existed\n") if not os.path.isdir('./data/jpg'): print("Unzip the images files...") tarfile.open(flowers_archive_path).extractall(path=data_path) if not os.path.isfile(img_label_path): print("Downloading image labels...") util.download_file(base_url + 'imagelabels.mat') else: print("Image labels already existed\n") if not os.path.isfile(setid_path): print("Downloading train/test/valid splits...") util.download_file(base_url + 'setid.mat')
def download_lecture_notes(course): print(" ------------- Start downloading " + course + "'s Lecture ------------- ") url = "https://webcms3.cse.unsw.edu.au/" + course + "/19T1" r = client.get(url, verify=False) if not r.status_code == 200: return print("") soup = BeautifulSoup(r.text, "lxml") sider_bar = soup.find('div', id='sidebar') lec = sider_bar.find('a', string="Lectures") if not lec: lec = sider_bar.find('a', string="Lectures ") location = lec['href'] url_lec = root + location r = client.get(url_lec, verify=False) soup = BeautifulSoup(r.text, "lxml") blocks = soup.find_all('div', 'panel panel-primary') dict[course]["lec"] = {} # print(sider_bar) for block in blocks: week_str = block.h4.text.strip() small = block.h4.small.text.strip() week_str = week_str.replace(small, "") week_str = week_str.replace("active", "") week_str = re.sub(r'\n', "", week_str) week_str = " ".join(week_str.split()) week_str = week_str.strip() dict[course]["lec"][week_str] = {} path = os.path.join(course, week_str) if not os.path.exists(path): os.makedirs(path) items = block.find_all('li', 'list-group-item') for item in items: name = item.find('a').text.strip() if len(name) <= 0: continue name = " ".join(name.split()) pdf = item.div.find('a', title="Download") pdf_url = root if pdf: pdf_url = root + pdf.get('href') if pdf_url == root: pdf_url = item.div.a.get('href') if pdf_url != root: name = name.replace("/", " ") path = os.path.join(course, week_str, name) # path = path.replace("\"", "ยง") succ = util.download_file(pdf_url, path) name = name.replace(".", "&") dict[course]["lec"][week_str][name] = pdf_url else: print("Cannot find lecture pdf") print(" ------------- Lecture download complete. :^ ) ------------- ")
class Club(Spider): """ Contains all info pertaining to an indviudal club (e.g Noworry About MJ), given its id. """ def __init__(self, sesh, club_id): super().__init__(sesh) ## Verify that club_id is num if not isinstance(club_id, int): # if str number, convert to int if not (r"^\d{1,4}$", club_id): raise ValueError("""Please enter the club_id in number form,\ or use alternative constructors to initialise using club name or manager.""") club_id = int(club_id) self.club_id = club_id # The same params are used so frequently across the class # that I just made it an instance var self.params = {'id': self.club_id} self.soup_dict = self.__get_soup_dict() def __repr__(self): class_name = self.get_class_name() return f'''{class_name} (\ \n\tClub_id: {self.club_id}\ \n\tClub_name: {self.club_name}\ \n\tStatus: {self.status}\ \n\t)''' def __str__(self): class_name = self.get_class_name() return f"{class_name} object ({self.club_name} [{self.club_id}])" @classmethod def fromName(cls, session, club_name): """ -Alternative Constructor- :param manager :type str Returns an instance that corresponds to that club_name. If cannot be found, Exception is raised. """ club_id = cls.get_club_id_from_name(session, club_name) if not club_id: raise Exception(f"Could not get id for manager: {club_name}") return cls(session, club_id) @classmethod def fromManager(cls, session, manager): """ -Alternative Constructor- :param club_name :type str Returns an instance that corresponds to that club_name. If cannot be found, Exception is raised. """ club_id = cls.get_club_id_from_manager(session, manager) if not club_id: raise Exception(f"Could not get id from manager: {manager}") return cls(session, club_id) # --- Soup --- def __get_soup_dict(self): """ Returns a dict of multi-use soup elements. e.g. the club_info_table is called by multiple properties. """ request = req_get(self.sesh, "vizualizare_club.php?", params=self.params) soup = make_soup(request) # Set instance var for full soup self.soup = soup # Common soup elements club_info = soup.find_all('table')[3].find_all('tr')[1] return { 'club_info': club_info, 'club_info_links': club_info.find_all('a'), 'equipment': soup.find_all('table')[5] } # --- Avatar --- @property def avatar(self): """ Returns the link to a club's avatar. """ request = req_get(self.sesh, "vizualizare_club.php?", params=self.params) soup = make_soup(request) # Grab avatar link from soup and replace spaces to make working link avatar = soup.find_all('table')[1].find_all('tr')[2].find('img').get('src').replace(' ', '%20') # If avatar is the defualt img if '/standard.jpg' in avatar: return False full_link = MAIN_URL + avatar return full_link def download_avatar(self): """ Downloads club's avatar to current directory. If club's avatar is default, returns False. """ if not (avatar:= self.avatar): return False try: util.download_file(avatar) except: raise Exception("Could not download avatar!")
from util import download_file from util import exists FILE_NAME = 'forge-1.7.10-10.13.4.1558-1.7.10-installer.jar' if not exists(FILE_NAME): print('downloading server...') download_file('https://files.minecraftforge.net/maven/net/minecraftforge/forge/1.7.10-10.13.4.1558-1.7.10/forge-1.7.10-10.13.4.1558-1.7.10-installer.jar', FILE_NAME) print('downloading server finished')
def get_artifact(self, artifact, local_path=None, only_newer=True, background=False): key = artifact.get('key') bucket = artifact.get('bucket') if key is None: assert not artifact['mutable'] assert artifact.get('url') is not None or \ artifact.get('qualified') is not None remote_path = artifact.get('url') if remote_path is None: remote_path = artifact.get('qualified') key = hashlib.sha256(remote_path.encode()).hexdigest() local_path = fs_tracker.get_blob_cache(key) if os.path.exists(local_path): self.logger.info( ('Immutable artifact exists at local_path {},' + ' skipping the download').format(local_path)) return local_path if artifact.get('url') is not None: download_file(remote_path, local_path, self.logger) else: if remote_path.startswith('dockerhub://') or \ remote_path.startswith('shub://'): self.logger.info( ('Qualified {} points to a shub or dockerhub,' + ' skipping the download')) return remote_path download_file_from_qualified(remote_path, local_path, self.logger) self.logger.debug( 'Downloaded file {} from external source {}'.format( local_path, remote_path)) return local_path if local_path is None: if 'local' in artifact.keys() and \ os.path.exists(artifact['local']): local_path = artifact['local'] else: if artifact['mutable']: local_path = fs_tracker.get_artifact_cache(key) else: local_path = fs_tracker.get_blob_cache(key) if os.path.exists(local_path): self.logger.info( ('Immutable artifact exists at local_path {},' + ' skipping the download').format(local_path)) return local_path local_path = re.sub('\/\Z', '', local_path) local_basepath = os.path.dirname(local_path) self.logger.info( "Downloading dir {} to local path {} from storage...".format( key, local_path)) if only_newer and os.path.exists(local_path): self.logger.debug( 'Comparing date of the artifact in storage with local') storage_time = self._get_file_timestamp(key) local_time = os.path.getmtime(local_path) if storage_time is None: self.logger.info( "Unable to get storage timestamp, storage is either " + "corrupted or has not finished uploading") return local_path if local_time > storage_time - self.timestamp_shift: self.logger.info( "Local path is younger than stored, skipping the download") return local_path tar_filename = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) self.logger.debug("tar_filename = {} ".format(tar_filename)) def finish_download(): try: self._download_file(key, tar_filename) except BaseException as e: self.logger.debug(e) if os.path.exists(tar_filename): # first, figure out if the tar file has a base path of . # or not self.logger.info("Untarring {}".format(tar_filename)) listtar, _ = subprocess.Popen(['tar', '-tf', tar_filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True).communicate() listtar = listtar.strip().split(b'\n') listtar = [s.decode('utf-8') for s in listtar] self.logger.info('List of files in the tar: ' + str(listtar)) if listtar[0].startswith('./'): # Files are archived into tar from .; adjust path # accordingly basepath = local_path else: basepath = local_basepath tarcmd = ('mkdir -p {} && ' + 'tar -xf {} -C {} --keep-newer-files') \ .format(basepath, tar_filename, basepath) self.logger.debug('Tar cmd = {}'.format(tarcmd)) tarp = subprocess.Popen(['/bin/bash', '-c', tarcmd], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) tarout, tarerr = tarp.communicate() if tarp.returncode != 0: self.logger.info('tar had a non-zero return code!') self.logger.info('tar cmd = ' + tarcmd) self.logger.info('tar output: \n ' + str(tarout)) if len(listtar) == 1: actual_path = os.path.join(basepath, listtar[0]) self.logger.info('Renaming {} into {}'.format( actual_path, local_path)) retry(lambda: os.rename(actual_path, local_path), no_retries=5, sleep_time=1, exception_class=OSError, logger=self.logger) os.remove(tar_filename) else: self.logger.warning( 'file {} download failed'.format(tar_filename)) if background: t = Thread(target=finish_download) t.start() return (local_path, t) else: finish_download() return local_path
def report_format_download(self, file, url_params={}): if self.token: token = self.token params = util.merge_params(self.params, url_params) url = "%s/report/format/download" % (self.connection_string) return util.download_file(url, params, {'token':token,'file':file})
def prepare_data(): data_name = os.path.join('data', 'enwiki8.npy') util.download_file('https://mxnet-experiment.s3.amazonaws.com/enwiki-dataset/enwiki8.npy', data_name) with open(data_name, 'rb') as f: return np.load(f)
def process_tweebo(): """Read, convert and save the Tweebo corpus""" download_file(TWEEBO_DAILY547_DL, "Data/TweeboDaily547.conll") download_file(TWEEBO_OCT27_DL, "Data/TweeboOct27.conll") def interpret_conll(path): """ Read a CONLL file line-by-line and export the tags :param path: The path of the file :return: A list of Tag objects. """ logging.info("Reading %s...", path) ret = [] tweebo = [ 'N', 'O', '^', 'S', 'Z', 'V', 'L', 'M', 'A', 'R', '!', 'D', 'P', '&', 'T', 'X', 'Y', '#', '@', '~', 'U', 'E', '$', ',', 'G' ] ref = [u'NOUN', u'PRON', 'NOUN', 'DET', 'NOUN', 'VERB', ''] tagmap = { 'N': u'NOUN', 'O': u'PRON', '^': u'NOUN', 'S': u'X', 'Z': u'NOUN', 'V': u'VERB', 'L': u'PRON', 'M': u'NOUN', 'A': u'ADJ', 'R': u'ADV', '!': u'.', 'D': u'DET', 'P': u'CONJ', '&': u'CONJ', 'T': u'PRT', 'X': u'DET', 'Y': u'DET', '#': u'X', '@': u'NOUN', '~': u'X', 'U': u'X', 'E': u'.', '$': u'NUM', ',': u'.', 'G': u'X' } with open(path, 'r') as fp: for line in fp: line = line.strip() if len(line) == 0: continue line = line.split() word, raw = line t = Tag(word, tagmap[raw]) ret.append(t) return ret d547 = interpret_conll("Data/TweeboDaily547.conll") o24 = interpret_conll("Data/TweeboOct27.conll") with open('Data/TweeboDaily547.pkl', 'w') as fout: logging.info("Saving daily...") pickle.dump(d547, fout, pickle.HIGHEST_PROTOCOL) with open('Data/TweeboOct27.pkl', 'w') as fout: logging.info("Saving Oct...") pickle.dump(o24, fout, pickle.HIGHEST_PROTOCOL)