def get(archive, provider=None, zipfile=None): """ Get a cached copy of the archive's zipfile if it's been cached. Otherwise create a new zipfile and cache it. archive: name of the archive (full path, may or may not be a file) provider: a function that can provide a zipfile associated with 'archive' if we need to construct one. zipfile: the zipfile we're currently using. if it differs from what is cached, close and return the cached copy instead. """ if archive in _zipfile_cache: # TODO(wickman) This is problematic sometimes as the zipfile library has a bug where # it assumes the underlying zipfile does not change between reads, so its info cache # gets out of date and throws BadZipfile exceptions (e.g. with EGG-INFO/requires.txt) if zipfile is not None and _zipfile_cache[archive] != zipfile: zipfile.close() zf = _zipfile_cache[archive] else: if zipfile is not None: zf = _zipfile_cache[archive] = zipfile else: assert provider is not None zf = _zipfile_cache[archive] = provider() _zipfile_namecache[archive] = set(zf.namelist()) # PyPy doesn't allow us to share its builtin_zipimport _zip_directory_cache. # TODO(wickman) Go back to the delegating proxy for zip directory cache. # TODO(wickman) Do not leave handles open, as this could cause ulimits to # be exceeded. _zip_directory_cache[archive] = ZipFileCache(archive) return zf
def download_files(item): updates = [] paths = [] content = requests.get(item['link']).content soup = BeautifulSoup(content, "html.parser") links = [l for l in soup.find_all('a') if l.has_attr('href')] download_url = "/".join(item['link'].split('/')[:-1]) + "/" link_texts = list(set([link.string for link in links])) page_text = soup.text title_dir = item['title'] for filename in link_texts: try: if ".zip" in filename: # Only find zip files if "pcap" in filename: # We don't need pcaps. Those will be recreated in the sandbox continue else: download_link = download_url + filename # Create the download link for each zip print "Downloading: {}".format( download_link) # Display what's happening res = requests.get( download_link) # Create the download object # Download the files into the 'malware' directory zipfile = open( os.path.join('malware', os.path.basename(filename)), 'wb') for chunk in res.iter_content(100000): zipfile.write(chunk) zipfile.close() write_append(downloaded_files, 'malware/' + filename + '\n') except: pass
def get_zip_logs(request): """ Make an archive of logs available to download """ from django.core.servers.basehttp import FileWrapper import zipfile try: compression = zipfile.ZIP_DEFLATED except: compression = zipfile.ZIP_STORED zipPath = "/tmp/logs.zip" zipfile = zipfile.ZipFile(zipPath, mode="w", allowZip64=True) files = ["tsconfig_gui.log", "django.log", "celery_w1.log"] for afile in files: fullpath = os.path.join("/var/log/ion", afile) if os.path.exists(fullpath): zipfile.write(fullpath, arcname=afile, compress_type=compression) zipfile.close() # TODO: Include the server serial number in the archive filename. # One possible source is /etc/torrentserver/tsconf.conf, serialnumber:XXXXXX archive_filename = "ts_update_logs.zip" response = http.HttpResponse(FileWrapper(open(zipPath)), mimetype="application/zip") response["Content-Disposition"] = "attachment; filename=%s" % archive_filename return response
def get_zip_logs(request): """ Make an archive of logs available to download """ from django.core.servers.basehttp import FileWrapper import zipfile try: compression = zipfile.ZIP_DEFLATED except Exception: compression = zipfile.ZIP_STORED zipPath = "/tmp/logs.zip" zipfile = zipfile.ZipFile(zipPath, mode="w", allowZip64=True) for afile in [ "tsconfig_gui.log", "django.log", "celery_w1.log", "tsconfig_debug.log", "product_integration.log", ]: fullpath = os.path.join("/var/log/ion", afile) if os.path.exists(fullpath): zipfile.write(fullpath, arcname=afile, compress_type=compression) zipfile.close() # TODO: Include the server serial number in the archive filename. # One possible source is /etc/torrentserver/tsconf.conf, serialnumber:XXXXXX archive_filename = "ts_update_logs.zip" response = http.HttpResponse(FileWrapper(open(zipPath)), mimetype="application/zip") response[ "Content-Disposition"] = "attachment; filename=%s" % archive_filename return response
def get(archive, provider=None, zipfile=None): """ Get a cached copy of the archive's zipfile if it's been cached. Otherwise create a new zipfile and cache it. archive: name of the archive (full path, may or may not be a file) provider: a function that can provide a zipfile associated with 'archive' if we need to construct one. zipfile: the zipfile we're currently using. if it differs from what is cached, close and return the cached copy instead. """ if archive in _zipfile_cache: if zipfile is not None and _zipfile_cache[archive] != zipfile: zipfile.close() zf = _zipfile_cache[archive] else: if zipfile is not None: zf = _zipfile_cache[archive] = zipfile else: assert provider is not None zf = _zipfile_cache[archive] = provider() _zipfile_namecache[archive] = set(zf.namelist()) # PyPy doesn't allow us to share its builtin_zipimport _zip_directory_cache. # TODO(wickman) Go back to the delegating proxy for zip directory cache. # TODO(wickman) Do not leave handles open, as this could cause ulimits to # be exceeded. _zip_directory_cache[archive] = ZipFileCache(archive) return zf
def get_zip_logs(request): ''' Make an archive of logs available to download ''' from django.core.servers.basehttp import FileWrapper import zipfile try: compression = zipfile.ZIP_DEFLATED except: compression = zipfile.ZIP_STORED zipPath = '/tmp/logs.zip' zipfile = zipfile.ZipFile(zipPath, mode='w', allowZip64=True) files = ['tsconfig_gui.log', 'django.log', 'celery_w1.log'] for afile in files: fullpath = os.path.join('/var/log/ion', afile) if os.path.exists(fullpath): zipfile.write(fullpath, arcname=afile, compress_type=compression) zipfile.close() #TODO: Include the server serial number in the archive filename. #One possible source is /etc/torrentserver/tsconf.conf, serialnumber:XXXXXX archive_filename = 'ts_update_logs.zip' response = http.HttpResponse(FileWrapper(open(zipPath)), mimetype='application/zip') response[ 'Content-Disposition'] = 'attachment; filename=%s' % archive_filename return response
def _download_and_extract_model_zip(self, url, file_name, force=False): if file_name: if os.path.exists(file_name): if force: os.remove(file_name) return temp_folder = tempfile.mkdtemp() try: if file_name != None: # online zip file # Download and extract zip archive. zip_file_name = os.path.join(temp_folder, "tmp.zip") self._download_with_progress_bar(url, zip_file_name) sys.stdout.write( "\rDownload complete, decompressing files ... " ) sys.stdout.flush() else: zip_file_name = url zipfile = ZipFile(zip_file_name, "r") zipfile.extractall(self.disk_path) zipfile.close() sys.stdout.write("\nModel extracted successfully.") sys.stdout.flush() except Exception as e: print("Error encountered, cleaning up and exiting ...") rmtree(temp_folder, ignore_errors=True) raise e # delete temporary folder rmtree(temp_folder, ignore_errors=True)
def deletable_file(fname, typ, name): """Serves and then deletes a zip file""" cherrypy.response.headers["Content-Type"] = typ cherrypy.response.headers["Content-Disposition"] = 'attachment; filename="%s"' % (name) zipfile = open(fname, "r+b") for line in zipfile: yield line zipfile.close() os.unlink(fname)
def deletable_file(fname, typ, name): """Serves and then deletes a zip file""" cherrypy.response.headers['Content-Type'] = typ cherrypy.response.headers[ "Content-Disposition"] = 'attachment; filename="%s"' % (name) zipfile = open(fname, 'r+b') for line in zipfile: yield line zipfile.close() os.unlink(fname)
def exclude(): for dirname, subdirs, files in os.walk("./"): if not subdirs: print(subdirs) else: print("There is a directory") if 'exclude directory' in subdirs: subdirs.remove('exclude directory') zf.write(dirname) for filename in files: zf.write(os.path.join(dirname, filename)) zf.close()
def EpubToTxt(file): import os import zipfile import re from bs4 import BeautifulSoup def PrettifyTxt(text): lines = text.split('\n') text = '' for line in lines: if line.split(): text = text + ' ' + line.strip() + '\n' return text filename = os.path.basename(file) filebasename = os.path.splitext(filename)[0] zipfile = zipfile.ZipFile(file) namelist = zipfile.namelist() opflist = [] text = '' for subfile in namelist: if subfile.endswith('.opf'): opflist.append(subfile) opffile = min(opflist, key=len) folder = opffile.rstrip(os.path.basename(opffile)) opfs = zipfile.open(opffile) opf = '' for line in opfs: opf = opf + str(line, 'utf-8') ncx = re.search('(?s)<spine.*toc.*=.*"ncx".*>(.*?)</spine>', opf, re.M).group() manifest = re.search('(?s)<manifest.*>(.*?)</manifest>', opf, re.M).group() ids = re.findall(' id="(.*?)"', manifest) hrefs = re.findall('href="(.*?)"', manifest) idrefs = re.findall('<itemref.*idref="(.*?)"', ncx) key = dict(zip(ids, hrefs)) for idref in idrefs: htmpath = folder + key[idref] htmopen = zipfile.open(htmpath) soup = BeautifulSoup(htmopen, 'lxml') text = text + soup.get_text() zipfile.close() return PrettifyTxt(text)
def rezip_one(target_zip, source_zip, compression): try: with ZipFile(source_zip).open('data') as f: data = f.read() except Exception as e: print('skipping zip file', source_zip, 'for error', e) return # write to intermediate file in case source_dir == target_dir zf = ZipFile(target_zip + '.new', 'w', compression=compression) zf.writestr('data', data) zf.close() copystat(source_zip, target_zip + '.new') move(target_zip + '.new', target_zip)
def handle_docs(email, files): # Assume zipper always succeed docfiles = list_files_by_extension(files, "docx") print "List of document files", docfiles zipper(ZipFileName, docfiles) zipfile = open(ZipFileName, "rb") zip = MIMEBase("application", "zip", name=ZipFileName) zip.set_payload(zipfile.read()) zipfile.close() encoders.encode_base64(zip) email.attach(zip) delete_files(docfiles)
def uncompress(srcfile, destdir): import gzip import tarfile, zipfile file = os.path.basename(srcfile) if os.path.isfile(file): shortname, fmt = os.path.splitext(file) fmt = fmt[1:] if fmt in ('tgz', 'tar'): try: tar = tarfile.open(srcfile) names = tar.getnames() for name in names: tar.extract(name, destdir) tar.close() except Exception as e: print("Can't uncompress {} for {}".format(file, e)) elif fmt == 'zip': try: zipfile = zipfile.ZipFile(srcfile) for names in zipfile.namelist(): zipfile.extract(names, destdir) zipfile.close() except Exception as e: print("Can't uncompress {} for {}".format(file, e)) elif fmt == 'gz': try: fname = os.path.join(destdir, os.path.basename(srcfile)) gfile = gzip.GzipFile(srcfile) open(fname, "w+").write(gfile.read()) # gzip对象用read()打开后,写入open()建立的文件中。 gfile.close() # 关闭gzip对象 except Exception as e: return False, e, fmt ''' elif fmt == 'rar': try: rar = rarfile.RarFile(srcfile) os.chdir(destdir) rar.extractall() rar.close() except Exception as e : return (False, e, filefmt) ''' else: print('文件格式不支持或者不是压缩文件') return None
def download_ftp(remote_path, file, ftp_ip, ftp_user, ftp_pass): zipfile = open(file, 'wb') ftp = ftplib.FTP(ftp_ip) ftp.login(ftp_user, ftp_pass) size = ftp.size(remote_path) global pbar pbar = 0 def file_write(data): zipfile.write(data) global pbar pbar += len(data) zipfile.flush() print("\r%3.2f %%" % (pbar * 100.0 / size), end='') ftp.retrbinary("RETR " + remote_path, file_write) zipfile.close() print('')
def full_backup(): import zipfile logger.info('Running full_backup') from django.utils import timezone from django.core.management import call_command now = timezone.now() file_name = 'backup-%s-%s-%s-%s-%s.json' % (now.year, now.month, now.day, now.hour, now.minute) file_path = os.path.join(PROJECT_ROOT, 'backups', file_name) dir = os.path.dirname(file_path) try: os.stat(dir) except: os.mkdir(dir) # we will temporarilly redirect stdout to a file to capture the data from the dumpdata cmd stdout = sys.stdout try: sys.stdout = open(file_path, 'w') call_command('dumpdata', use_natural_foreign_keys=True, use_natural_primary_keys=True) sys.stdout.close() except Exception as exc: logger.error(exc) # Put stdout back to what it was sys.stdout = stdout # Now zip the file zip_file_path = os.path.join(PROJECT_ROOT, 'backups', file_name + '.gz') zipfile = gzip.GzipFile(zip_file_path, "wb") try: inputFile = open(file_path,"r") zipfile.write(inputFile.read()) finally: zipfile.close() # Delete the original uncompressed file os.remove(file_path)
def _mark_as_ended(self): if not self.alive: LOGGER.debug("Skipping creating the offline archive as we are not alive") return LOGGER.info("Starting saving the offline archive") self.stop_time = local_timestamp() self._write_experiment_meta_file() try: zipfile = self._get_offline_archive(self.offline_directory, self.id) except (OSError, IOError) as exc: # Use a temporary directory if we came so far to not lose the information old_dir = self.offline_directory self.offline_directory = tempfile.mkdtemp() zipfile = self._get_offline_archive(self.offline_directory, self.id) LOGGER.warning( OFFLINE_EXPERIMENT_TEMPORARY_DIRECTORY, old_dir, str(exc), self.offline_directory, ) for file in os.listdir(self.tmpdir): zipfile.write(os.path.join(self.tmpdir, file), file) zipfile.close() # Clean the tmpdir to avoid filling up the disk try: shutil.rmtree(self.tmpdir) except OSError: # We made our best effort to clean ourselves msg = "Error cleaning offline experiment tmpdir %r" LOGGER.debug(msg, self.tmpdir, exc_info=True) # Display the full command to upload the offline experiment LOGGER.info(OFFLINE_EXPERIMENT_END, zipfile.filename)
def decrypt_sound_found_in_memory(self, in_file_path): with open(in_file_path, 'rb') as in_file: file_size = struct.unpack('<Q', in_file.read(struct.calcsize('<Q')))[0] iv = in_file.read(16) aes = AES.new(self.key, AES.MODE_CBC, iv) out_file = tempfile.NamedTemporaryFile(delete=False) while True: data = in_file.read(self.size_chunk) n = len(data) if n == 0: break decode = aes.decrypt(data) n = len(decode) if file_size > n: out_file.write(decode) else: out_file.write( decode[:file_size]) # <- remove padding on last block file_size -= n temp_path = out_file.name[:out_file.name.rfind('\\')] import zipfile zipfile = zipfile.ZipFile(out_file) zipfile.extractall(temp_path) zipfile.close() info_zipfile = zipfile.infolist() if len(info_zipfile) < 2: info_file_path = info_zipfile[0].filename.replace('/', '\\') print(info_file_path) sf_path = temp_path + '\\' + info_file_path print(sf_path) return sf_path
def get_zip_logs(request): """ Make an archive of logs available to download """ from django.core.servers.basehttp import FileWrapper import zipfile try: compression = zipfile.ZIP_DEFLATED except: compression = zipfile.ZIP_STORED zipPath = '/tmp/logs.zip' zipfile = zipfile.ZipFile(zipPath, mode='w', allowZip64=True) for afile in ['tsconfig_gui.log', 'django.log', 'celery_w1.log', 'tsconfig_debug.log', 'product_integration.log']: fullpath = os.path.join('/var/log/ion', afile) if os.path.exists(fullpath): zipfile.write(fullpath, arcname=afile, compress_type=compression) zipfile.close() #TODO: Include the server serial number in the archive filename. #One possible source is /etc/torrentserver/tsconf.conf, serialnumber:XXXXXX archive_filename = 'ts_update_logs.zip' response = http.HttpResponse(FileWrapper (open(zipPath)), mimetype='application/zip') response['Content-Disposition'] = 'attachment; filename=%s' % archive_filename return response
def run_model_tracking (server, user, password, zipath, projectname, modelname): ''' Given server and project params, create a project and register the model in SAS Model manager :param server: :param user: :param password: :param project: :param model: :return: None ''' with Session(hostname=server, username=user, password=password, verify_ssl=False): zipfile = open(zipath, 'rb') model_repository.import_model_from_zip(modelname, projectname, file=zipfile, version='new' ) zipfile.close() return 0
#!/usr/bin/python # http://www.pythonchallenge.com/pc/def/channel.html import urllib, zipfile, re fileUrl = 'http://www.pythonchallenge.com/pc/def/channel.zip' (filename, ignore) = urllib.urlretrieve(fileUrl) zipfile = zipfile.ZipFile(filename) startingPattern = re.compile('start from ([0-9]*)') linkingNumberPattern = re.compile('Next nothing is ([0-9]*)') number = startingPattern.search(zipfile.read('readme.txt')).group(1) while number: filename = number + '.txt' fileContents = zipfile.read(filename) info = zipfile.getinfo(filename) print info.comment, numberMatch = linkingNumberPattern.search(fileContents) if numberMatch: number = numberMatch.group(1) else: number = None zipfile.close()
def assemble(filenames, outputdir, sizes, lowResComp=85, hiResComp=70, customCaptions={}, logger = printLogger): global directories_json, filenames_json, captions_json, resolutions_json, info_json, captions current_dir = os.getcwd() os.chdir(os.path.join(outputdir, "slides")) subdirs = [] for x,y in sizes: if x < 0 or y < 0: name = "original_size" else: name = str(x) + "x" + str(y) os.mkdir(name) subdirs.append(name) directories_json = toJSON(subdirs) f = open("directories.json", "w") try: f.write(directories_json) finally: f.close() if quick_scaling: scaling_method = Image.BICUBIC # alt: Image.BICUBIC or Image.BILINEAR else: scaling_method = Image.ANTIALIAS files = [] captions.update(customCaptions) resolutions = {} if create_picture_archive: zipfile = ZipFile("../pictures.zip", "w") pool = Pool() for name in filenames: logger(name) basename = os.path.basename(name) im = Image.open(name) if create_picture_archive: if archive_quality < 100: fname = os.path.join(tempfile.gettempdir(), "gwtphotoalbum_temporary_image.jpg") im.load() #ImageFile.MAXBLOCK = 1000000 im.save(fname, "JPEG", optimize=1, quality=archive_quality, progressive=1) zipfile.write(fname, os.path.join(os.path.basename(outputdir), basename)) os.remove(fname) else: zipfile.write(name, os.path.join(os.path.basename(outputdir), basename)) if not customCaptions: comment = read_caption(name) if comment: captions[basename] = comment files.append(basename) compression = {} comp = float(lowResComp) if len(sizes) > 1: delta = float((hiResComp-lowResComp))/(len(sizes)-1) else: delta = 0.0 for dim in sizes: compression[dim] = int(comp+0.5) comp += delta pool = Pool() res = [] for dimensions, dirname in zip(sizes, subdirs): if tuple(dimensions) == tuple(FULLSIZE): shutil.copy(name, dirname); res.append([im.size[0], im.size[1]]) else: x, y = dimensions w, h = adjustSize(im.size, (x, y)) filename = os.path.join(dirname, basename) pool.apply_async(resizeAndSave, (im, w, h, scaling_method, os.path.join(dirname, basename), compression[dimensions])) #resizeAndSave(im, w, h, scaling_method, # os.path.join(dirname, basename), # compression[dimensions]) res.append([w,h]) #if tuple(dimensions) == tuple(sizes[-1]) \ # and basename in captions: # pass #write_caption(os.path.join(dirname, basename), # captions[basename]) pool.close() pool.join() resolutions[basename] = checkInRes(res) if create_picture_archive: zipfile.close() pool.close() pool.join() filenames_json = toJSON(files) f = open("filenames.json", "w") try: f.write(filenames_json) finally: f.close() strippedCaptions = {} for fn in files: if fn in captions: strippedCaptions[fn] = captions[fn] captions_json = toJSON(strippedCaptions) f = open("captions.json", "w") try: f.write(captions_json) finally: f.close() res_jsn = [invertedResDict(), resolutions] resolutions_json = toJSON(res_jsn, None) f = open("resolutions.json", "w") try: f.write(resolutions_json) finally: f.close() info_json = toJSON(info) f = open("info.json", "w") try: f.write(info_json) finally: f.close() os.chdir(current_dir)
import zipfile as zf SOURCE = r'<path_to_zip' DESTINATION = r'<path_to_dest>' with zf.ZipFile(SOURCE, 'r') as zf: zf.extractall(DESTINATION) zf.close() #cleanup
f2.close() # Set it to be viewed "inline" img1.add_header('Content-Disposition', 'inline', filename='Odin Jobseeker.png') img2.add_header('Content-Disposition', 'inline', filename='Thor Job Hunter.png') msg.attach(img1) msg.attach(img2) # def zipper(zipname, files): z = zipfile.ZipFile(zipname, 'w') for f in files: z.write(f) z.close() zipper("cv.zip", cvs) zipfile = open("cv.zip","rb") zip = MIMEBase('application', 'zip', name="cv.zip") zip.set_payload(zipfile.read()) zipfile.close() # encoders.encode_base64(zip) msg.attach(zip) # File deletion def delete_files(files): for f in files: os.remove(f) files = [cvs, images] delete_files(files) # Send the message s = smtplib.SMTP("blast.sit.rp.sg") s.sendmail(sender, recipient, msg.as_string())
def __init__( self, project_name=None, # type: Optional[str] workspace=None, # type: Optional[str] log_code=True, # type: Optional[bool] log_graph=True, # type: Optional[bool] auto_param_logging=True, # type: Optional[bool] auto_metric_logging=True, # type: Optional[bool] parse_args=True, # type: Optional[bool] auto_output_logging="default", # type: Optional[str] log_env_details=True, # type: Optional[bool] log_git_metadata=True, # type: Optional[bool] log_git_patch=True, # type: Optional[bool] disabled=False, # type: Optional[bool] offline_directory=None, # type: Optional[str] log_env_gpu=True, # type: Optional[bool] log_env_host=True, # type: Optional[bool] api_key=None, # type: Optional[str] display_summary=None, # type: Optional[bool] log_env_cpu=True, # type: Optional[bool] display_summary_level=None, # type: Optional[int] auto_weight_logging=None, # type: Optional[bool] auto_log_co2=False, # type: Optional[bool] auto_metric_step_rate=10, # type: Optional[int] auto_histogram_tensorboard_logging=False, # type: Optional[bool] auto_histogram_epoch_rate=1, # type: Optional[int] auto_histogram_weight_logging=False, # type: Optional[bool] auto_histogram_gradient_logging=False, # type: Optional[bool] auto_histogram_activation_logging=False, # type: Optional[bool] ): # type: (...) -> None """ Creates a new experiment and serialize it on disk. The experiment file will need to be upload manually later to appears on the frontend. Args: project_name: Optional. Send your experiment to a specific project. Otherwise will be sent to `Uncategorized Experiments`. If project name does not already exists Comet.ml will create a new project. workspace: Optional. Attach an experiment to a project that belongs to this workspace log_code: Default(True) - allows you to enable/disable code logging log_graph: Default(True) - allows you to enable/disable automatic computation graph logging. auto_param_logging: Default(True) - allows you to enable/disable hyper parameters logging auto_metric_logging: Default(True) - allows you to enable/disable metrics logging auto_metric_step_rate: Default(10) - controls how often batch metrics are logged auto_histogram_tensorboard_logging: Default(False) - allows you to enable/disable automatic histogram logging auto_histogram_epoch_rate: Default(1) - controls how often histograms are logged auto_histogram_weight_logging: Default(False) - allows you to enable/disable automatic histogram logging of biases and weights auto_histogram_gradient_logging: Default(False) - allows you to enable/disable automatic histogram logging of gradients auto_histogram_activation_logging: Default(False) - allows you to enable/disable automatic histogram logging of activations auto_output_logging: Default("default") - allows you to select which output logging mode to use. You can pass `"native"` which will log all output even when it originated from a C native library. You can also pass `"simple"` which will work only for output made by Python code. If you want to disable automatic output logging, you can pass `False`. The default is `"default"` which will detect your environment and deactivate the output logging for IPython and Jupyter environment and sets `"native"` in the other cases. auto_log_co2: Default(True) - automatically tracks the CO2 emission of this experiment if `codecarbon` package is installed in the environment parse_args: Default(True) - allows you to enable/disable automatic parsing of CLI arguments log_env_details: Default(True) - log various environment information in order to identify where the script is running log_env_gpu: Default(True) - allow you to enable/disable the automatic collection of gpu details and metrics (utilization, memory usage etc..). `log_env_details` must also be true. log_env_cpu: Default(True) - allow you to enable/disable the automatic collection of cpu details and metrics (utilization, memory usage etc..). `log_env_details` must also be true. log_env_host: Default(True) - allow you to enable/disable the automatic collection of host information (ip, hostname, python version, user etc...). `log_env_details` must also be true. log_git_metadata: Default(True) - allow you to enable/disable the automatic collection of git details display_summary_level: Default(1) - control the summary detail that is displayed on the console at end of experiment. If 0, the summary notification is still sent. Valid values are 0 to 2. disabled: Default(False) - allows you to disable all network communication with the Comet.ml backend. It is useful when you want to test to make sure everything is working, without actually logging anything. offline_directory: the directory used to save the offline archive for the experiment. """ self.config = get_config() self.api_key = get_api_key( api_key, self.config ) # optional, except for on-line operations if offline_directory is None: offline_directory = self.config["comet.offline_directory"] if offline_directory is None: raise ValueError("OfflineExperiment needs an offline directory") self.offline_directory = offline_directory # Start and ends time self.start_time = None self.stop_time = None self.mode = "create" super(OfflineExperiment, self).__init__( project_name=project_name, workspace=workspace, log_code=log_code, log_graph=log_graph, auto_param_logging=auto_param_logging, auto_metric_logging=auto_metric_logging, parse_args=parse_args, auto_output_logging=auto_output_logging, log_env_details=log_env_details, log_git_metadata=log_git_metadata, log_git_patch=log_git_patch, disabled=disabled, log_env_gpu=log_env_gpu, log_env_host=log_env_host, display_summary=display_summary, display_summary_level=display_summary_level, log_env_cpu=log_env_cpu, auto_weight_logging=auto_weight_logging, auto_log_co2=auto_log_co2, auto_metric_step_rate=auto_metric_step_rate, auto_histogram_epoch_rate=auto_histogram_epoch_rate, auto_histogram_tensorboard_logging=auto_histogram_tensorboard_logging, auto_histogram_weight_logging=auto_histogram_weight_logging, auto_histogram_gradient_logging=auto_histogram_gradient_logging, auto_histogram_activation_logging=auto_histogram_activation_logging, ) if not self.disabled: # Check that the offline directory is usable try: # Try to create the archive now zipfile = self._get_offline_archive(self.offline_directory, self.id) # Close the file handle, it will be reopened later zipfile.close() except (OSError, IOError) as exc: raise InvalidOfflineDirectory(self.offline_directory, str(exc)) if self.disabled is not True: if api_key is not None: self._log_once_at_level( logging.WARNING, "api_key was given, but is ignored in OfflineExperiment(); remember to set when you upload", ) elif self.api_key is not None: self._log_once_at_level( logging.INFO, "COMET_API_KEY was set, but is ignored in OfflineExperiment(); remember to set when you upload", ) self._start() if self.alive is True: self._report(event_name=EXPERIMENT_CREATED)
def EpubToTxt(file): import os import zipfile import re import html2text from bs4 import BeautifulSoup def PrettifyTxt(text): lines = text.split('\n') text = '' for line in lines: if line.split(): text = text + ' ' + line.strip() + '\n' return text filename = os.path.basename(file) filebasename = os.path.splitext(filename)[0] path = file.strip('/' + filename) savepath = path + filebasename zipfile = zipfile.ZipFile(file) namelist = zipfile.namelist() subfilelist = [] flag = {} text = '' for subfile in namelist: flag1 = subfile.endswith('.html') flag2 = subfile.endswith('.htm') flag3 = subfile.endswith('.xhtml') if flag1 or flag2 or flag3: flag[subfile] = True for subfile in namelist: if subfile.endswith('.opf'): folder = subfile.rstrip(os.path.basename(subfile)) opfs = zipfile.open(subfile) opf = '' for line in opfs: opf = opf + str(line, 'utf-8') left1 = re.search('<spine toc="ncx">', opf).span()[0] left2 = re.search('<manifest>', opf).span()[0] right1 = re.search('</spine>', opf).span()[1] right2 = re.search('</manifest>', opf).span()[1] ncx = opf[left1:right1] manifest = opf[left2:right2] ids = re.findall('id="(.*?)"', manifest) hrefs = re.findall('href="(.*?)"', manifest) idrefs = re.findall('<itemref idref="(.*?)"', ncx) key = {} for i in range(0, len(ids)): key[ids[i]] = hrefs[i] for idref in idrefs: htmpath = folder + key[idref] if htmpath in flag.keys() and flag[htmpath]: htmopen = zipfile.open(htmpath) soup = BeautifulSoup(htmopen, 'lxml') text = text + soup.get_text() flag[htmpath] = False else: pass zipfile.close() return PrettifyTxt(text)
# 定义备份目标 target_dir = 'D:\Transfer\SkyDrive\\backup' # 用time包里的日期函数链接目标文件夹位置,作为每日不同的备份文件夹名称 today = target_dir + os.sep + time.strftime('%Y%m%d') # 返回备份时间,写压缩包名字用 now = time.strftime('%H%M%S') # 允许在文件名中产生一个注释 comment = input('Enter a comment --> ') if len(comment) == 0: # 若无注释 target = today + os.sep + '文献翻译' + now + '.zip' else: # 若有注释,把注释也置于文件名中 target = today + os.sep + '文献翻译' + now + '_' +\ comment.replace(' ', '_') + '.zip' # 每日生成一个新的文件夹 if not os.path.exists(today): os.mkdir(today) print('Successfully created directory', today) zipfile = zipfile.ZipFile(target,'w',compression=zipfile.ZIP_DEFLATED) # 写入一个zip文件 trav(source) # 调用递归函数 zipfile.close() # 关闭之 # 输出一个提示 print('Successful backup to', target)