def test_atomic_write(tmpdir): fname = tmpdir.join('ha') for i in range(2): with atomic_write(str(fname), overwrite=True) as f: f.write('hoho') with pytest.raises(OSError) as excinfo: with atomic_write(str(fname), overwrite=False) as f: f.write('haha') assert excinfo.value.errno == errno.EEXIST assert fname.read() == 'hoho' assert len(tmpdir.listdir()) == 1
def test_teardown(tmpdir): fname = tmpdir.join('ha') with pytest.raises(AssertionError): with atomic_write(str(fname), overwrite=True) as f: assert False assert not tmpdir.listdir()
def export_feature(self, feature): abs_path = self.feature2abspath(feature) with atomicwrites.atomic_write(abs_path, overwrite=True) as fh: self.encoder.encode_feature(feature, fh) return abs_path
def test_replace_simultaneously_created_file(tmpdir): fname = tmpdir.join('ha') with atomic_write(str(fname), overwrite=True) as f: f.write('hoho') fname.write('harhar') assert fname.read() == 'harhar' assert fname.read() == 'hoho' assert len(tmpdir.listdir()) == 1
def test_atomic_write_in_pwd(tmpdir): orig_curdir = os.getcwd() try: os.chdir(str(tmpdir)) fname = 'ha' for i in range(2): with atomic_write(str(fname), overwrite=True) as f: f.write('hoho') with pytest.raises(OSError) as excinfo: with atomic_write(str(fname), overwrite=False) as f: f.write('haha') assert excinfo.value.errno == errno.EEXIST assert open(fname).read() == 'hoho' assert len(tmpdir.listdir()) == 1 finally: os.chdir(orig_curdir)
def test_open_reraise(tmpdir): fname = tmpdir.join('ha') with pytest.raises(AssertionError): with atomic_write(str(fname), overwrite=False) as f: # Mess with f, so rollback will trigger an OSError. We're testing # that the initial AssertionError triggered below is propagated up # the stack, not the second exception triggered during rollback. f.name = "asdf" # Now trigger our own exception. assert False, "Intentional failure for testing purposes"
def test_dont_remove_simultaneously_created_file(tmpdir): fname = tmpdir.join('ha') with pytest.raises(OSError) as excinfo: with atomic_write(str(fname), overwrite=False) as f: f.write('hoho') fname.write('harhar') assert fname.read() == 'harhar' assert excinfo.value.errno == errno.EEXIST assert fname.read() == 'harhar' assert len(tmpdir.listdir()) == 1
def _upload_impl(self, item, href): fpath = self._get_filepath(href) try: with atomic_write(fpath, mode='wb', overwrite=False) as f: f.write(item.raw.encode(self.encoding)) return fpath, get_etag_from_file(f) except OSError as e: if e.errno == errno.EEXIST: raise exceptions.AlreadyExistingError(existing_href=href) else: raise
def save_status(base_path, pair, collection=None, data_type=None, data=None): assert data_type is not None assert data is not None status_name = get_status_name(pair, collection) path = expand_path(os.path.join(base_path, status_name)) + '.' + data_type prepare_status_path(path) with atomic_write(path, mode='w', overwrite=True) as f: json.dump(data, f) os.chmod(path, STATUS_PERMISSIONS)
def _write(self): if self._last_mtime is not None and self._last_mtime != os.path.getmtime(self.path): raise exceptions.PreconditionFailed( "Some other program modified the file {r!}. Re-run the " "synchronization and make sure absolutely no other program is " "writing into the same file.".format(self.path) ) text = join_collection((item.raw for item, etag in itervalues(self._items))) try: with atomic_write(self.path, mode="wb", overwrite=True) as f: f.write(text.encode(self.encoding)) finally: self._items = None self._last_mtime = None
def write(text, filename, encoding='utf-8', mode='wb'): """ Write 'text' to file ('filename') assuming 'encoding' in an atomic way Return (eventually new) encoding """ text, encoding = encode(text, encoding) if 'a' in mode: with open(filename, mode) as textfile: textfile.write(text) else: with atomic_write(filename, overwrite=True, mode=mode) as textfile: textfile.write(text) return encoding
def update(self, href, item, etag): fpath = self._get_filepath(href) if not os.path.exists(fpath): raise NotFoundError(item.uid) actual_etag = get_etag_from_file(fpath) if etag != actual_etag: raise WrongEtagError(etag, actual_etag) if not isinstance(item.raw, str): raise TypeError('item.raw must be a unicode string.') with atomic_write(fpath, mode='wb', overwrite=True) as f: f.write(item.raw.encode(self.encoding)) etag = get_etag_from_file(f) return etag
def _write(self): if self._last_etag is not None and \ self._last_etag != get_etag_from_file(self.path): raise exceptions.PreconditionFailed( 'Some other program modified the file {r!}. Re-run the ' 'synchronization and make sure absolutely no other program is ' 'writing into the same file.'.format(self.path)) text = join_collection( item.raw for item, etag in self._items.values() ) try: with atomic_write(self.path, mode='wb', overwrite=True) as f: f.write(text.encode(self.encoding)) finally: self._items = None self._last_etag = None
def write(self, create=False): if self.filename is None: if not create: raise ValueError('Create arg must be true ' 'if filename is None.') self.random_filename() if self.filepath is None: raise ValueError('basepath and calendar must be set.') calendar_path = os.path.join(self.basepath, self.calendar) if not os.path.exists(calendar_path): raise CliError('Calendars are not explicitly created. ' 'Please create the directory {} yourself.' .format(calendar_path)) with atomic_write(self.filepath, mode='wb', overwrite=not create) as f: f.write(self.vcal.to_ical()) while self._old_filepaths: os.remove(self._old_filepaths.pop())
def save_status(base_path, pair, collection=None, data_type=None, data=None): assert data_type is not None assert data is not None status_name = get_status_name(pair, collection) path = expand_path(os.path.join(base_path, status_name)) + '.' + data_type dirname = os.path.dirname(path) try: os.makedirs(dirname, STATUS_DIR_PERMISSIONS) except OSError as e: if e.errno != errno.EEXIST: raise with atomic_write(path, mode='w', overwrite=True) as f: json.dump(data, f) os.chmod(path, STATUS_PERMISSIONS)
def _download_sqlite(db_path): logging.info('Grabbing file-copy lock') if FILE_COPY_LOCK.acquire(): # Check it again, now that we've finally gotten the lock logging.info('Grabbed lock, checking if file exists: %s', os.path.exists(db_path)) if not os.path.exists(db_path): # TODO: ensure we copy to a staging ground, and move it into place (so no one tries to open a malformed file) client = storage.Client('dancedeets-hrd') bucket = client.get_bucket('dancedeets-dependencies') logging.info('Downloading file') blob = bucket.get_blob(os.path.basename(db_path)) # Luckily this only takes around 5-10 seconds (for street dance) when run on GCE instances contents = blob.download_as_string() with atomic_write(db_path, overwrite=True) as f: f.write(contents) # db_path doesn't exist yet. # Now it does. FILE_COPY_LOCK.release()
def update(self, href, item, etag): fpath = self._get_filepath(href) if not os.path.exists(fpath): raise exceptions.NotFoundError(item.uid) actual_etag = get_etag_from_file(fpath) if etag != actual_etag: raise exceptions.WrongEtagError(etag, actual_etag) if not isinstance(item.raw, text_type): raise TypeError('item.raw must be a unicode string.') with atomic_write(fpath, mode='wb', overwrite=True) as f: f.write(item.raw.encode(self.encoding)) etag = get_etag_from_fileobject(f) if self.post_hook: self._run_post_hook(fpath) return etag
def _write_pyc(state, co, source_stat, pyc): # Technically, we don't have to have the same pyc format as # (C)Python, since these "pycs" should never be seen by builtin # import. However, there's little reason deviate, and I hope # sometime to be able to use imp.load_compiled to load them. (See # the comment in load_module above.) try: with atomicwrites.atomic_write(pyc, mode="wb", overwrite=True) as fp: fp.write(imp.get_magic()) mtime = int(source_stat.mtime) size = source_stat.size & 0xFFFFFFFF fp.write(struct.pack("<ll", mtime, size)) fp.write(marshal.dumps(co)) except EnvironmentError as e: state.trace("error writing pyc file at %s: errno=%s" % (pyc, e.errno)) # we ignore any failure to write the cache file # there are many reasons, permission-denied, __pycache__ being a # file etc. return False return True
def save_status(base_path, pair, collection=None, data_type=None, data=None): assert data_type is not None assert data is not None status_name = get_status_name(pair, collection) path = expand_path(os.path.join(base_path, status_name)) + '.' + data_type base_path = os.path.dirname(path) if collection is not None and os.path.isfile(base_path): raise CliError('{0} is probably a legacy file and could be removed ' 'automatically, but this choice is left to the ' 'user. If you think this is an error, please file ' 'a bug at {1}'.format(base_path, PROJECT_HOME)) try: os.makedirs(base_path, 0o750) except OSError as e: if e.errno != errno.EEXIST: raise with atomic_write(path, mode='w', overwrite=True) as f: json.dump(data, f)
async def apply_config( self, interfaces: Dict[str, InterfaceModel], nameservers: List[str], routes: List[RouteModel], ) -> None: logger.debug("In Network.apply_config()") self._is_busy = True sysconfig = Path("/etc/sysconfig/network") bonding_slave_interfaces: List[str] = [] for iface in interfaces: conf = sysconfig.joinpath(f"ifcfg-{iface}") new_config = interfaces[iface].config if new_config is None: # interface with no config, so get rid of it conf.unlink(missing_ok=True) continue # This is destructive of any existing config # (i.e. we're not mergeing here, we're clobbering) logger.info(f"Writing {conf}") with atomic_write(conf, overwrite=True) as f: f.write(f"STARTMODE='auto'\n" f"BOOTPROTO='{new_config.bootproto}'\n" f"IPADDR='{new_config.ipaddr}'\n") if new_config.bonding_slaves: f.write("BONDING_MASTER='yes'\n") for i, s in enumerate(new_config.bonding_slaves): f.write(f"BONDING_SLAVE{i}='{s}'\n") bonding_slave_interfaces.append(s) f.write( "BONDING_MODULE_OPTS='mode=active-backup miimon=100'\n" ) # special case for bonding slave interfaces, if any, which need to be # overwritten with STARTMODE=hotplug, BOOTPROTO=none for iface in bonding_slave_interfaces: conf = sysconfig.joinpath(f"ifcfg-{iface}") logger.info(f"Writing {conf}") with atomic_write(conf, overwrite=True) as f: f.write("STARTMODE='hotplug'\nBOOTPROTO='none'\n") self._refresh_interfaces() # Write nameserver config self._update_config( Path("/etc/sysconfig/network/config"), {"NETCONFIG_DNS_STATIC_SERVERS": " ".join(nameservers)}, ) self._refresh_nameservers() # Write route config for iface in self._interfaces: self._update_route_file( Path(f"/etc/sysconfig/network/ifroute-{iface}"), iface, [route for route in routes if route.interface == iface], ) self._update_route_file( Path("/etc/sysconfig/network/routes"), "-", [route for route in routes if route.interface == "-"], ) self._refresh_routes() logger.info("Restarting network services...") ret, _, err = await aqr_run_cmd( ["systemctl", "restart", "network.service"]) # TODO: can this fail? logger.info("Restarted network services") self._is_busy = False
def update_metafile(source_meta, dest_meta, updated, **kwargs): features = {} for path in updated: path = os.path.abspath(path) # See what's going on here - it is something between not # awesome and wildly inefficient. It is possible and likely # and just generally more better that we can and will # simply parse the filename and extract the ID accordingly. # The thing is that we need (want) to pass a file to the # dump_row function in order to generate a file_hash which # a bunch of other services use for detecting changes. # So, in the meantime this is what we're doing... # (20151111/thisisaaronland) feature = mapzen.whosonfirst.utils.load_file(path) props = feature['properties'] wofid = props['wof:id'] features[wofid] = path source_fh = open(source_meta, 'r') reader = csv.DictReader(source_fh) writer = None with atomicwrites.atomic_write(dest_meta, mode='wb', overwrite=True) as dest_fh: for row in reader: id = row['id'] id = int(id) if features.get(id, False): logging.debug("update row for %s in %s" % (id, dest_meta)) path = features[id] row = mapzen.whosonfirst.meta.dump_file(path, **kwargs) del (features[id]) if not writer: fn = fieldnames() writer = csv.DictWriter(dest_fh, fieldnames=fn) writer.writeheader() writer.writerow(row) for wofid, path in features.items(): row = mapzen.whosonfirst.meta.dump_file(path, **kwargs) writer.writerow(row) # https://github.com/whosonfirst/py-mapzen-whosonfirst-meta/issues/2 perms = kwargs.get('perms', 0o644) if perms != None: os.chmod(dest_meta, perms)
def save_settings(settings): with atomic_write(SETTINGS_FILE, overwrite=True) as f: f.write(json.dumps(settings, indent=4))
def _download_file(self, url, filename, compress=False, timeout=30): """ Download a file to the resources folder. Download data from `url`, save as `filename`, and optionally compress with gzip. Parameters ---------- url : str URL to download data from filename : str name of file to save; if compress, ensure '.gz' is appended compress : bool compress with gzip timeout : int seconds for timeout of download request Returns ------- str path to downloaded file, empty str if error """ if compress and filename[-3:] != ".gz": filename += ".gz" destination = os.path.join(self._resources_dir, filename) if not create_dir(os.path.dirname(destination)): return "" if not os.path.exists(destination): try: # get file if it hasn't already been downloaded # http://stackoverflow.com/a/7244263 with urllib.request.urlopen( url, timeout=timeout) as response, atomic_write( destination, mode="wb", overwrite=True) as f: self._print_download_msg(destination) data = response.read() # a `bytes` object if compress: self._write_data_to_gzip(f, data) else: f.write(data) except urllib.error.URLError as err: logger.warning(err) destination = "" # try HTTP if an FTP error occurred if "ftp://" in url: destination = self._download_file( url.replace("ftp://", "http://"), filename, compress=compress, timeout=timeout, ) except socket.timeout: logger.warning(f"Timeout downloading {url}") destination = "" except FileExistsError: # if the file exists, another process has created it while it was # being downloaded # in such a case, the other copy is identical, so ignore this error pass return destination
def cacheData(self, keyname, online_path_list, username=None, password=None, authentication_url=None, cookiejar=None, use_requests=False, use_progress_bar=True): ''' Download and store specified data to local disk @param keyname: Name of dataset in configuration file @param online_path_list: List of urls to data @param username: Username for accessing online resources @param password: Password for accessing online resources @param authentication_url: The url used for authentication (unused when use_requests=True) @param cookiejar: The cookiejar that stores credentials (unused when use_requests=True) @param use_requests: Use the requests library instead of the standard library for accessing resources @param use_progress_bar: Use a progress bar to show number of items downloaded @return List of downloaded file locations ''' def parseURL(data_location, in_path): ''' This function takes the file path of saved data and determines what url created it. @param data_location: Absolute path to root directory whose path is not part of the url @param path: Path to object that will be used to generate a url @return ParseResult of url generated from in_path ''' data_location_parts = len(pathlib.Path(data_location).parts[:]) path = pathlib.Path(in_path) access_type = path.parts[data_location_parts] if access_type != 'file': access_type += '://' else: access_type += ':///' url_path = pathlib.Path(*path.parts[data_location_parts + 1:]).as_posix() return parse.urlparse(access_type + url_path) def generatePath(data_location, parsed_url): ''' This function takes a parsed url (ParseResult) and generates the filepath to where the data should be stored stored @param data_location: Location where data is stored @param parsed_url: ParseResult generated from url @return Local path to file ''' if parsed_url.query == '': return os.path.join(data_location, parsed_url.scheme, parsed_url.netloc, parsed_url.path[1:]) else: return os.path.join( data_location, parsed_url.scheme, parsed_url.netloc, parsed_url.path[1:] + '?' + parsed_url.query) # Get absolute path to data directory data_location = DataFetcherCache.getDataLocation(keyname) # If it doesn't exist, create a new one if data_location == None: data_location = os.path.join(os.path.expanduser('~'), '.skdaccess', keyname) os.makedirs(data_location, exist_ok=True) DataFetcherCache.setDataLocation(keyname, data_location) # Get currently downloaded files downloaded_full_file_paths = [ filename for filename in glob(os.path.join(data_location, '**'), recursive=True) if os.path.isfile(filename) ] # Remove files empty files downloaded_full_file_paths = [ filename for filename in downloaded_full_file_paths if self.checkIfDataExists(filename) ] # Convert filenames to urls downloaded_parsed_urls = set( parseURL(data_location, file_path) for file_path in downloaded_full_file_paths) # Determine which files are missing parsed_http_paths = [ parse.urlparse(online_path) for online_path in online_path_list ] missing_files = list( set(parsed_http_paths).difference(downloaded_parsed_urls)) missing_files.sort() # Download missing files if len(missing_files) > 0: # Sanity check on input options if use_requests == True and authentication_url != None: raise ValueError( 'Cannot use an authentication url with requests') # Setup connection (non requests) if not use_requests: # Deal with password protected urls # This method comes from # https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python if username != None or password != None: password_manager = HTTPPasswordMgrWithDefaultRealm() if authentication_url == None: authentication_url = [ parsed_url.geturl() for parsed_url in missing_files ] password_manager.add_password(None, authentication_url, username, password) handler = HTTPBasicAuthHandler(password_manager) # If no cookiejar was given, create a new one if cookiejar == None: cookiejar = CookieJar() cookie_processor = HTTPCookieProcessor(cookiejar) install_opener(build_opener(cookie_processor, handler)) # Use a cookie with no username or password elif cookiejar != None: cookie_processor = HTTPCookieProcessor(cookiejar) install_opener(build_opener(cookie_processor)) if use_progress_bar: missing_files_loop = tqdm(missing_files) else: missing_files_loop = missing_files for parsed_url in missing_files_loop: out_filename = generatePath(data_location, parsed_url) os.makedirs(os.path.split(out_filename)[0], exist_ok=True) with open(out_filename, 'a+b') as lockfile: fcntl.lockf(lockfile, fcntl.LOCK_EX) lockfile.seek(0) if len(lockfile.read(1)) == 0: with atomic_write(out_filename, mode='wb', overwrite=True) as data_file: if not use_requests: shutil.copyfileobj( urlopen(parsed_url.geturl()), data_file) else: if username != None or password != None: # This method to download password protected data comes from # https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python with requests.Session() as session: initial_request = session.request( 'get', parsed_url.geturl()) r = session.get(initial_request.url, auth=(username, password), stream=True) if r.status_code == 401: raise RuntimeError( "Authorization Denied") shutil.copyfileobj( r.raw, data_file, 1024 * 1024 * 10) else: with requests.Session() as session: r = session.get(parsed_url.geturl(), stream=True) shutil.copyfileobj( r.raw, data_file, 1024 * 1024 * 10) # Return a list of file locations for parsing return [ generatePath(data_location, parsed_url) for parsed_url in parsed_http_paths ]
def save_config(config): ensure_parent_dir_exists(CONFIG_FILE) with atomic_write(CONFIG_FILE, mode='wb', overwrite=True) as f: f.write(toml.dumps(config).encode('utf-8'))
def write_empty(self, passphrase): with atomic_write(self.keys_path, mode='w', overwrite=False) as f: f.truncate() with atomic_write(self.store, mode='wb', overwrite=False) as f: f.write(_encrypt({}, passphrase))
def update_metafile(source_meta, dest_meta, updated, **kwargs): features = {} for path in updated: path = os.path.abspath(path) # See what's going on here - it is something between not # awesome and wildly inefficient. It is possible and likely # and just generally more better that we can and will # simply parse the filename and extract the ID accordingly. # The thing is that we need (want) to pass a file to the # dump_row function in order to generate a file_hash which # a bunch of other services use for detecting changes. # So, in the meantime this is what we're doing... # (20151111/thisisaaronland) feature = mapzen.whosonfirst.utils.load_file(path) props = feature['properties'] wofid = props['wof:id'] features[wofid] = path source_fh = open(source_meta, 'r') reader = csv.DictReader(source_fh) writer = None with atomicwrites.atomic_write(dest_meta, mode='wb', overwrite=True) as dest_fh: for row in reader: id = row['id'] id = int(id) if features.get(id, False): logging.debug("update row for %s in %s" % (id, dest_meta)) path = features[id] row = mapzen.whosonfirst.meta.dump_file(path, **kwargs) del(features[id]) if not writer: fn = fieldnames() writer = csv.DictWriter(dest_fh, fieldnames=fn) writer.writeheader() writer.writerow(row) for wofid, path in features.items(): row = mapzen.whosonfirst.meta.dump_file(path, **kwargs) writer.writerow(row) # https://github.com/whosonfirst/py-mapzen-whosonfirst-meta/issues/2 perms = kwargs.get('perms', 0644) if perms != None: os.chmod(dest_meta, perms)
def _save_token(token): checkdir(expand_path(os.path.dirname(token_file)), create=True) with atomic_write(token_file, mode='w', overwrite=True) as f: json.dump(token, f)
def main(): logger.info("start") # get filenames from openSNP data dump filenames = r.get_opensnp_datadump_filenames() filenames = [ filename for filename in filenames if "readme" not in filename and "phenotype" not in filename ] # draw a sample from the observations random.seed(1) SAMPLE_SIZE = len(filenames) # SAMPLE_SIZE = 10 samples = random.sample(range(len(filenames)), SAMPLE_SIZE) # setup tasks for parallelizing / execution on multiple cores p = Parallelizer(parallelize=True) tasks = [{"file": filenames[i]} for i in samples] # run tasks; results is a list of dicts results = p(load_file, tasks) # get results from `load_file` where `count` was non-zero rows = [item for item in results if "msg" not in item] df = pd.DataFrame( rows, columns=["file", "source", "build", "build_detected", "chromosomes", "count"], ) save_df_as_csv(df, OUTPUT_DIR, "parse-opensnp-files.csv") # log parsing statistics file_count = len(filenames) logger.info(f"{file_count} files in the openSNP datadump") logger.info(f"{(len(df) / file_count):.2%} of openSNP datadump files parsed") logger.info( f"build detected in {len(df.loc[df.build_detected]) / len(df):.2%} of files parsed" ) # extract files from the datadump where `load_file` returned a message if EXTRACT_FILES: # group files with same message (e.g., {"some message": ["file1", "file2"], ...}) d = {} for result in results: if "msg" in result: if result["msg"] in d: d[result["msg"]].append(result["file"]) else: d[result["msg"]] = [result["file"]] # add messages / file filters as necessary... d["build not detected"] = list(df.loc[~df.build_detected].file.values) # extract files that have messages for debugging for msg, files in d.items(): if len(files) == 0: continue # create a directory for each message (prefix indicates number of files) path = os.path.join(OUTPUT_DIR, f"{len(files):04}_{clean_str(msg)}") create_dir(path) # save each file with message into created directory for filename in files: with atomic_write(os.path.join(path, filename), mode="wb") as f: f.write(r.load_opensnp_datadump_file(filename)) logger.info("stop")
def set_state(self, state: str): os.makedirs(os.path.dirname(self.filename), exist_ok=True) with atomic_write(self.filename, overwrite=True, encoding='utf-8') as f: f.write(state)
def generate_cov(bam_file, bam_index, out, threshold, is_combined, contig_threshold, logger, sep=None): """ Call bedtools and generate coverage file bam_file: bam files used out: output threshold: threshold of contigs that will be binned is_combined: if using abundance feature in deep learning. True: use contig_threshold: threshold of contigs for must-link constraints sep: separator for multi-sample binning """ import numpy as np logger.info('Processing `{}`'.format(bam_file)) bam_name = os.path.split(bam_file)[-1] + '_{}'.format(bam_index) bam_depth = os.path.join(out, '{}_depth.txt'.format(bam_name)) bed_p = subprocess.Popen( ['bedtools', 'genomecov', '-bga', '-ibam', bam_file], universal_newlines=True, stdout=subprocess.PIPE) if is_combined: contig_cov, must_link_contig_cov = calculate_coverage( bed_p.stdout, bam_file, threshold, is_combined=is_combined, sep=sep, contig_threshold=contig_threshold if sep is None else 1000, contig_threshold_dict=contig_threshold if sep is not None else None) if bed_p.wait() != 0: raise OSError("Failure in running bedtools") contig_cov = contig_cov.apply(lambda x: x + 1e-5) must_link_contig_cov = must_link_contig_cov.apply(lambda x: x + 1e-5) if sep is None: abun_scale = (contig_cov.mean() / 100).apply(np.ceil) * 100 abun_split_scale = (must_link_contig_cov.mean() / 100).apply( np.ceil) * 100 contig_cov = contig_cov.div(abun_scale) must_link_contig_cov = must_link_contig_cov.div(abun_split_scale) with atomic_write(os.path.join(out, '{}_data_cov.csv'.format(bam_name)), overwrite=True) as ofile: contig_cov.to_csv(ofile) with atomic_write(os.path.join( out, '{}_data_split_cov.csv'.format(bam_name)), overwrite=True) as ofile: must_link_contig_cov.to_csv(ofile) else: contig_cov = calculate_coverage( bed_p.stdout, bam_file, threshold, is_combined=is_combined, sep=sep, contig_threshold=contig_threshold if sep is None else 1000, contig_threshold_dict=contig_threshold if sep is not None else None) if bed_p.wait() != 0: raise OSError("Failure in running bedtools") contig_cov = contig_cov.apply(lambda x: x + 1e-5) with atomic_write(os.path.join(out, '{}_data_cov.csv'.format(bam_name)), overwrite=True) as ofile: contig_cov.to_csv(ofile) return (bam_file, logger)
def _save_token(token): with atomic_write(token_file, mode='w', overwrite=True) as f: json.dump(token, f)
def set_meta(self, key, value): value = value or '' assert isinstance(value, str) fpath = os.path.join(self.path, key) with atomic_write(fpath, mode='wb', overwrite=True) as f: f.write(value.encode(self.encoding))
def __update_concordances(source, dest, to_process, **kwargs): to_update = {} source_fh = open(source, 'r') reader = csv.reader(source_fh) # First figure out the columns we've got cols = reader.next() # Next check to see if there are any new ones # Note that this does NOT attempt to check and see whether the files # in to_process actually have modified concordances. That is still not # a solved problem but beyond that it is a problem to solve elsewhere # in the stack. This assumes that by the time you pass a list of files # you've satisfied yourself that it's worth the processing time (not to # mention memory) to fill up `to_update` for all the files listed in # to_update. (20160107/thisisaaronland) for path in to_process: path = os.path.abspath(path) feature = mapzen.whosonfirst.utils.load_file(path) props = feature['properties'] wofid = props['wof:id'] concordances = props['wof:concordances'] to_update[ wofid ] = concordances for src in concordances.keys(): if not src in cols: cols.append(src) cols.sort() # Rewind the source concordances file so that we can create a dict reader source_fh.seek(0) reader = csv.DictReader(source_fh) writer = None with atomicwrites.atomic_write(dest, mode='wb', overwrite=True) as dest_fh: for row in reader: if not writer: writer = csv.DictWriter(dest_fh, fieldnames=cols) writer.writeheader() # See what we're doing here? If this is a record that's been # updated (see above for the nuts and bolts about how/where # we determine this) then we reassign it to `row`. wofid = row.get('wof:id') wofid = int(wofid) if to_update.get(wofid, False): row = to_update[wofid] row['wof:id'] = wofid out = {} # Esnure we have a value or "" for every src in cols for src in cols: out[ src ] = row.get(src, "") writer.writerow(out)
def set_meta(self, key, value): value = normalize_meta_value(value) fpath = os.path.join(self.path, key) with atomic_write(fpath, mode='wb', overwrite=True) as f: f.write(value.encode(self.encoding))
"build_detected", "x_snps", "heterozygous_x_snps", "y_snps", "y_snps_not_null", "count", ], ) # derive the columns we want to analyze df["heterozygous_x_snps_ratio"] = df.heterozygous_x_snps / df.x_snps df["y_snps_not_null_ratio"] = df.y_snps_not_null / df.y_snps df.drop(df.loc[df["heterozygous_x_snps_ratio"].isna()].index, inplace=True) df.drop(df.loc[df["y_snps_not_null_ratio"].isna()].index, inplace=True) plt = create_analysis_plot( df[["heterozygous_x_snps_ratio", "y_snps_not_null_ratio"]]) # save output with atomic_write( f"{os.path.join(OUTPUT_DIR, 'xy-chrom-snp-ratios.png')}", mode="wb", overwrite=True, ) as f: plt.savefig(f) save_df_as_csv(df, OUTPUT_DIR, "xy-chrom-snp-ratios.csv") logger.info("stop")
def __call__(self, path, mode): return atomic_write(path, mode=mode, overwrite=self._overwrite)
def on_phrase(j): phrase = parse_phrase(j.get("phrase", [])) cmd = j["cmd"] if cmd == "p.end" and phrase: with atomic_write(path, overwrite=True) as f: f.write(phrase)
def _set_key(self, content): checkdir(os.path.dirname(self._key_path), create=True) with atomicwrites.atomic_write(self._key_path, mode='wb') as f: f.write(content) assert_permissions(self._key_path, 0o600)
def ensure_image_loaded(image_name, image_url, cache_dir): """Pull docker image into local repo.""" # check if image is in local docker repo try: registry = app.conf.get("CONTAINER_REGISTRY", None) # Custom edit to load image from registry try: if registry is not None: logger.info( "Trying to load docker image {} from registry '{}'".format( image_name, registry)) registry_url = os.path.join(registry, image_name) logger.info("docker pull {}".format(registry_url)) check_output(['docker', 'pull', registry_url]) logger.info("docker tag {} {}".format(registry_url, image_name)) check_output(['docker', 'tag', registry_url, image_name]) except Exception as e: logger.warn( "Unable to load docker image from registry '{}': {}".format( registry, e)) image_info = check_output(['docker', 'inspect', image_name]) logger.info("Docker image %s cached in repo" % image_name) except: logger.info("Failed to inspect docker image %s" % image_name) # pull image from url if image_url is not None: image_file = os.path.join(cache_dir, os.path.basename(image_url)) if not os.path.exists(image_file): logger.info("Downloading image %s (%s) from %s" % (image_file, image_name, image_url)) try: osaka.main.get(image_url, image_file) except Exception as e: raise RuntimeError("Failed to download image {}:\n{}".format(image_url, str(e))) logger.info("Downloaded image %s (%s) from %s" % (image_file, image_name, image_url)) load_lock = "{}.load.lock".format(image_file) try: with atomic_write(load_lock) as f: f.write("%sZ\n" % datetime.utcnow().isoformat()) logger.info("Loading image %s (%s)" % (image_file, image_name)) p = Popen(['docker', 'load', '-i', image_file], stderr=PIPE, stdout=PIPE) stdout, stderr = p.communicate() if p.returncode != 0: raise RuntimeError("Failed to load image {} ({}): {}".format(image_file, image_name, stderr.decode())) logger.info("Loaded image %s (%s)" % (image_file, image_name)) try: os.unlink(image_file) except: pass try: os.unlink(load_lock) except: pass except OSError as e: if e.errno == 17: logger.info("Waiting for image %s (%s) to load" % (image_file, image_name)) inspect_image(image_name) else: raise else: # pull image from docker hub logger.info("Pulling image %s from docker hub" % image_name) check_output(['docker', 'pull', image_name]) logger.info("Pulled image %s from docker hub" % image_name) image_info = check_output(['docker', 'inspect', image_name]) logger.info("image info for %s: %s" % (image_name, image_info.decode())) return json.loads(image_info)[0]
def _dump(obj, abspath, serializer_type, dumper_func=None, compress=True, overwrite=False, verbose=False, **kwargs): """Dump object to file. :param abspath: The file path you want dump to. :type abspath: str :param serializer_type: 'binary' or 'str'. :type serializer_type: str :param dumper_func: A dumper function that takes an object as input, return binary or string. :type dumper_func: callable function :param compress: default ``False``. If True, then compress binary. :type compress: bool :param overwrite: default ``False``, If ``True``, when you dump to existing file, it silently overwrite it. If ``False``, an alert message is shown. Default setting ``False`` is to prevent overwrite file by mistake. :type overwrite: boolean :param verbose: default True, help-message-display trigger. :type verbose: boolean """ _check_serializer_type(serializer_type) if not inspect.isfunction(dumper_func): raise TypeError("dumper_func has to be a function take object as input " "and return binary!") prt_console("\nDump to '%s' ..." % abspath, verbose) if os.path.exists(abspath): if not overwrite: prt_console( " Stop! File exists and overwrite is not allowed", verbose, ) return st = time.clock() b_or_str = dumper_func(obj, **kwargs) if serializer_type is "str": b = b_or_str.encode("utf-8") else: b = b_or_str if compress: b = zlib.compress(b) with atomic_write(abspath, overwrite=overwrite, mode="wb") as f: f.write(b) elapsed = time.clock() - st prt_console(" Complete! Elapse %.6f sec." % elapsed, verbose) if serializer_type is "str": return b_or_str else: return b
def _save(self): with atomic_write(self._filename, overwrite=True) as f: for entry in self._entries: line = entry.to_line() f.write(line)
def _set_auth_token(self, token): checkdir(os.path.dirname(self._auth_token_path), create=True) with atomicwrites.atomic_write(self._auth_token_path) as f: f.write(token) assert_permissions(self._auth_token_path, 0o600)
def write(self, where, content): with atomic_write(path=where, overwrite=True, mode='wb') as f: f.write(pickle.dumps(content))
def write(self, write_cb): assert self._lock.is_locked with atomic_write(self._path, overwrite=True) as f: write_cb(f)
def main(args=None): import shlex start = datetime.datetime.now() if args is None: args = sys.argv # Save it for later command_line = ' '.join([shlex.quote(a) for a in args]) args = parse_args(args) validate_args(args) out = args.output if not os.path.exists(out): os.makedirs(out) with tempfile.TemporaryDirectory() as tmpdirname: try: if args.genome_fasta is not None: gene_prediction(args.genome_fasta, out, tmpdirname) args.nt_input = out + '/prodigal_out.fna' args.aa_input = out + '/prodigal_out.faa' if args.nt_input is not None: n_nt = number_seqs_fafile(args.nt_input) n_aa = number_seqs_fafile(args.aa_input) if n_nt != n_aa: sys.stderr.write( f"The input DNA and amino acid files must have the same number of sequences!\n" ) sys.stderr.write( f"DNA file has {n_nt} while amino acid file has {n_aa}!" ) sys.exit(1) split_file(args.nt_input, output_dir=tmpdirname + '/split_file', is_dna=True) num_split = split_file(args.aa_input, output_dir=tmpdirname + '/split_file', is_dna=False) hit_table = [] gene_table = [] print( 'Starting GMGC queries (total: {} batches to process)'.format( num_split)) for index in tqdm(range(num_split)): besthit = query_gmgc(tmpdirname + '/split_file/split_{}.faa'.format(index + 1)) if besthit is not None: besthit = json.loads(bytes.decode( besthit.content))['results'] nt_split = None if args.nt_input is not None: nt_split = tmpdirname + '/split_file/split_{}.fna'.format( index + 1) aa_split = tmpdirname + '/split_file/split_{}.faa'.format( index + 1), hit_table_index, gene_inf = realignment( nt_split, aa_split, besthit) hit_table.extend(hit_table_index) gene_table.extend(gene_inf) hit_table = pd.DataFrame(hit_table) hit_table.columns = [ 'query_name', 'unigene_id', 'align_category', 'gene_dna', 'gene_protein' ] gene_table = pd.DataFrame(gene_table) gene_table.columns = [ 'unigene_id', 'sample', 'longitude', 'latitude', 'habitat' ] num_gene = hit_table.shape[0] summary = [] summary.append('*' * 30 + 'GMGC-mapper results summary table' + '*' * 30) summary.append('- Processed {} genes'.format(num_gene)) match_result = hit_table['align_category'].value_counts().to_dict() if 'EXACT' in match_result: summary.append( ' -{0} ({1:.1%}) were found in the GMGC at above 95% nucleotide identity with at least 95% coverage' .format(match_result['EXACT'], match_result['EXACT'] / num_gene)) else: summary.append( ' -No genes were found in the GMGC at above 95% nucleotide identity with at least 95% coverage' ) if 'SIMILAR' in match_result: summary.append( ' -{0} ({1:.1%}) were found in the GMGC at above 80% nucleotide identity with at least 80% coverage' .format(match_result['SIMILAR'], match_result['SIMILAR'] / num_gene)) else: summary.append( ' -No genes were found in the GMGC at above 80% nucleotide identity with at least 80% coverage' ) if 'MATCH' in match_result: summary.append( ' -{0} ({1:.1%}) were found in the GMGC at above 50% nucleotide identity with at least 50% coverage' .format(match_result['MATCH'], match_result['MATCH'] / num_gene)) else: summary.append( ' -No genes were found in the GMGC at above 50% nucleotide identity with at least 50% coverage' ) no_match = match_result.get('NO MATCH', 0.0) + match_result.get( 'NO HIT', 0.0) if no_match: summary.append( ' -{0} ({1:.1%}) had no match in the GMGC'.format( no_match, no_match / num_gene)) genome_bin = query_genome_bin(hit_table) genome_bin = genome_bin.sort_values('nr_hits', ascending=False) summary.append('\n\n' + '*' * 30 + 'GMGC-mapper results genome_bin summary' + '*' * 30 + '\n') num_hitting = genome_bin['nr_hits'].values summary.append('{} bins were reported for >50% of genes'.format( np.sum(num_hitting > num_gene * 0.5))) summary.append('{} bins were reported for >25% of genes'.format( np.sum(num_hitting > num_gene * 0.25))) summary.append('{} bins were reported for >10% of genes'.format( np.sum(num_hitting > num_gene * 0.1))) with atomic_write(out + '/genome_bin.tsv', overwrite=True) as ofile: ofile.write( '# Genome_bin from GMGC-mapper v{}\n'.format(__version__)) genome_bin.to_csv(ofile, sep='\t', index=False) with atomic_write(out + '/hit_table.tsv', overwrite=True) as ofile: ofile.write( '# Results from GMGC-mapper v{}\n'.format(__version__)) hit_table.to_csv(ofile, sep='\t', index=False) with atomic_write(out + '/gene_table.tsv', overwrite=True) as ofile: ofile.write('# Gene information from GMGC-mapper v{}\n'.format( __version__)) gene_table.to_csv(ofile, sep='\t', index=False) with atomic_write(out + '/summary.txt', overwrite=True) as ofile: for s in summary: print(s) ofile.write(s + '\n') output_content = resource_string(__name__, 'output.md') with atomic_write(out + '/README.md', overwrite=True) as ofile: ofile.write(bytes.decode(output_content)) end = datetime.datetime.now() run_metadata = { 'Command_line': command_line, 'GMGC-mapper': __version__, 'Working directory': os.getcwd(), 'Start time': str(start), 'End time': str(end), 'Run time': (end - start).seconds, 'Inputs': [], } if args.genome_fasta is not None: run_metadata['Inputs'].append( {'genome_input': input_metadata(args.genome_fasta)}) if args.nt_input is not None: run_metadata['Inputs'].append( {'nt_input': input_metadata(args.nt_input)}) if args.aa_input is not None: run_metadata['Inputs'].append( {'aa_input': input_metadata(args.aa_input)}) with atomic_write(out + '/runlog.yaml', overwrite=True) as ofile: yaml.dump(run_metadata, ofile, default_flow_style=False) except Exception as e: sys.stderr.write('GMGC-mapper Error: ') sys.stderr.write(str(e)) sys.stderr.write('\n') sys.exit(1)
def download_example_datasets(self): """ Download example datasets from `openSNP <https://opensnp.org>`_. Per openSNP, "the data is donated into the public domain using `CC0 1.0 <http://creativecommons.org/publicdomain/zero/1.0/>`_." Returns ------- paths : list of str or empty str paths to example datasets References ---------- 1. Greshake B, Bayer PE, Rausch H, Reda J (2014), "openSNP-A Crowdsourced Web Resource for Personal Genomics," PLOS ONE, 9(3): e89204, https://doi.org/10.1371/journal.pone.0089204 """ paths = [] paths.append( self._download_file( "https://opensnp.org/data/662.23andme.304", "662.23andme.304.txt.gz", compress=True, ) ) paths.append( self._download_file( "https://opensnp.org/data/662.23andme.340", "662.23andme.340.txt.gz", compress=True, ) ) paths.append( self._download_file( "https://opensnp.org/data/662.ftdna-illumina.341", "662.ftdna-illumina.341.csv.gz", compress=True, ) ) paths.append( self._download_file( "https://opensnp.org/data/663.23andme.305", "663.23andme.305.txt.gz", compress=True, ) ) # these two files consist of concatenated gzip files and therefore need special handling paths.append( self._download_file( "https://opensnp.org/data/4583.ftdna-illumina.3482", "4583.ftdna-illumina.3482.csv.gz", ) ) paths.append( self._download_file( "https://opensnp.org/data/4584.ftdna-illumina.3483", "4584.ftdna-illumina.3483.csv.gz", ) ) for gzip_path in paths[-2:]: # https://stackoverflow.com/q/4928560 # https://stackoverflow.com/a/37042747 with open(gzip_path, "rb") as f: decompressor = zlib.decompressobj(31) # decompress data from first concatenated gzip file data = decompressor.decompress(f.read()) if len(decompressor.unused_data) > 0: # decompress data from second concatenated gzip file, if any additional_data = zlib.decompress(decompressor.unused_data, 31) data += additional_data[33:] # skip over second header # recompress data with atomic_write(gzip_path, mode="wb", overwrite=True) as f: self._write_data_to_gzip(f, data) return paths
def dump_debug_info(praw_object, exc=None, paste_key=None, xml=None, extra_data={}, dir="error", build=None): ''' if not isinstance(praw_object, praw_object_wrapper_t): raise ValueError("dump_debug_info was passed an invalid praw_object: {}".format(type(praw_object))) ''' if not (paste_key is None or isinstance(paste_key, str)): raise ValueError( "dump_debug_info was passed an invalid paste_key: {}".format( type(paste_key))) id = praw_object.id if not os.path.exists(dir): os.makedirs(dir) if not os.path.exists("{}/{}".format(dir, id)): os.makedirs("{}/{}".format(dir, id)) ''' if xml is None and isinstance(paste_key, str): try: c = get_url_data("http://pastebin.com/raw/" + paste_key) c = c.replace("-", "+").replace("_", "/") xml = pastebin.decode_base64_and_inflate(c) except urllib2.URLError as e: logging.error("An exception occurred when attempting to fetch xml for debug dump.") ''' if xml is not None: if isinstance(xml, ET.ElementTree): xml = xml.getroot() xml_str = ET.tostring(xml).decode() if not isinstance(xml_str, str): raise ValueError( "dump_debug_info was passed invalid xml: is not string or coercable to string" ) with open("{}/{}/pastebin.xml".format(dir, id), "w") as f: f.write(xml_str) data = {} if exc is not None: data['error_text'] = repr(exc) if paste_key is not None: data['pastebin_url'] = "http://pastebin.com/raw/{}".format(paste_key) if praw_object is not None: if isinstance(praw_object, praw.models.Comment): data['type'] = "comment" else: data['type'] = "submission" data['url'] = praw_object.permalink if build is not None: if hasattr(build, 'passives_by_name'): data['passives'] = build.passives_by_name for key, val in list(data['passives'].items()): if not isinstance(key, str): logging.warning( "WARNING: {} passive key overriden to 'NONE'.".format( key)) data['passives']['NONE'] = val del data['passives'][key] data.update(extra_data) with atomic_write("{}/{}/info.json".format(dir, id), overwrite=True) as f: json.dump(data, f, sort_keys=True, indent=4) if exc is not None: with open("{}/{}/traceback.txt".format(dir, id), "w") as f: traceback.print_exc(file=f) logging.info("Dumped info to {}/{}/".format(dir, id))
async def dir_task(limiter, session, path, consul_path): path.mkdir(exist_ok=True) prev_items = {str(i.relative_to(path)) for i in path.rglob("*")} index = 0 while True: if index: index_q = f"&index={index}" else: index_q = "" logger.debug("dir_task: get: path=%s index=%s ...", consul_path, index) try: await limiter.acquire() async with session.get( f"{CONSUL_ADDR}/v1/kv/{consul_path}?recurse=true{index_q}", ) as r: logger.debug("dir_task: ... get: path=%s status=%s", consul_path, r.status) new_index = int(r.headers["X-Consul-Index"]) if new_index < index: index = 0 logger.warning("dir_task: path=%s: resetting index", consul_path) else: index = new_index if r.status != 200: continue j = await r.json() except ( aiohttp.client_exceptions.ClientConnectorError, aiohttp.client_exceptions.ServerDisconnectedError, asyncio.exceptions.TimeoutError, ) as e: logger.warning("dir_task: consul server: %s", e) continue items = { i["Key"][len(consul_path) + 1:]: base64.b64decode(i["Value"]) for i in j } for k, v in items.items(): logger.debug("dir_task: path=%s: writing '%s'", consul_path, k) fpath = path / k with atomicwrites.atomic_write(fpath, mode="wb", overwrite=True) as f: f.write(v) fpath.chmod(0o444) prev_items.discard(k) for i in prev_items: logger.debug("dir_task: path=%s: deleting '%s'", consul_path, i) path.joinpath(i).unlink() prev_items = set(items)
def _write_param(key, value): param_path = os.path.join(PARAMS_DIR, key) with atomic_write(param_path, overwrite=True) as f: f.write(json.dumps(value)) os.chmod(param_path, 0o666)
if playlist['name'] == "Discover Weekly": continue print(playlist['name']) print(' total tracks', playlist['tracks']['total']) results = sp.user_playlist(username, playlist['id'], fields="tracks,next") deeztracks = [] tracks = results['tracks'] deeztracks += tracks['items'] show_tracks(tracks) while tracks['next']: tracks = sp.next(tracks) deeztracks += tracks['items'] show_tracks(tracks) trackery += [deeztracks] random.shuffle(trackery) flattrack = [track for album in trackery for track in album] pp = pprint.PrettyPrinter(indent=4) print("Creating . . .") pigl = sp.user_playlist_create(username, "Shuffle!", public=False) for offset in range(0, len(flattrack), 50): sp.user_playlist_add_tracks(username, pigl['id'], [track['track']['id'] for track in flattrack[offset:offset+50]]) else: print("Can't get token for", username) with atomic_write('cache.yaml', overwrite=True) as f: yaml.dump(cache, f)
def get_next_statement(self, existing_statements=set(), downloaded_statements=set()): pay_history, = self.wait_and_return( lambda: self.find_element_in_any_frame( By.PARTIAL_LINK_TEXT, "Pay History", only_displayed=True)) pay_history.click() def get_statement_table(): try: for table in self.find_elements_in_any_frame( By.TAG_NAME, 'table', only_displayed=True): headings = [ x.text.strip() for x in table.find_elements_by_xpath('thead/tr/th') ] if 'Pay Date' in headings and 'Document Number' in headings: return table except: import traceback traceback.print_exc() table, = self.wait_and_return(get_statement_table) date_format = '%m/%d/%Y' for row in table.find_elements_by_xpath('tbody/tr'): row_text = [ x.text.strip() for x in row.find_elements_by_tag_name('td') ] row_text = [x for x in row_text if x] pay_date = row_text[0] document_number = row_text[1] assert re.fullmatch('[0-9A-Z]+', document_number), document_number pay_date = datetime.datetime.strptime(pay_date, date_format).date() document_str = 'Document %r : %r' % (pay_date, document_number) if (pay_date, document_number) in existing_statements: logger.info(' Found in existing') continue if (pay_date, document_number) not in downloaded_statements: logger.info('%s: Downloading', document_str) link = row.find_element_by_tag_name('a') link.click() download_link, = self.wait_and_return( lambda: self.find_element_in_any_frame( By.XPATH, '//input[@type="image" and contains(@title, "Download")]' )) download_link.click() logger.info('%s: Waiting to get download', document_str) download_result, = self.wait_and_return( self.get_downloaded_file) name, data = download_result if len(data) < 5000: raise RuntimeError('Downloaded file size is invalid: %d' % len(data)) output_name = '%s.statement-%s.pdf' % ( pay_date.strftime('%Y-%m-%d'), document_number) output_path = os.path.join(self.output_directory, output_name) with atomic_write(output_path, mode='wb') as f: f.write(data) downloaded_statements.add((pay_date, document_number)) return True else: logger.info('%s: Just downloaded', document_str) return False
def save_df_as_csv( df, path, filename, comment="", prepend_info=True, atomic=True, **kwargs ): """ Save dataframe to a CSV file. Parameters ---------- df : pandas.DataFrame dataframe to save path : str path to directory where to save CSV file filename : str or buffer filename for file to save or buffer to write to comment : str header comment(s); one or more lines starting with '#' prepend_info : bool prepend file generation information as comments atomic : bool atomically write output to a file on local filesystem **kwargs additional parameters to `pandas.DataFrame.to_csv` Returns ------- str or buffer path to saved file or buffer (empty str if error) """ buffer = False if isinstance(filename, io.IOBase): buffer = True if isinstance(df, pd.DataFrame) and len(df) > 0: if not buffer and not create_dir(path): return "" if buffer: destination = filename else: destination = os.path.join(path, filename) logger.info("Saving {}".format(os.path.relpath(destination))) if prepend_info: s = ( "# Generated by snps v{}, https://pypi.org/project/snps/\n" "# Generated at {} UTC\n".format( snps.__version__, datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"), ) ) else: s = "" s += comment if "na_rep" not in kwargs: kwargs["na_rep"] = "--" if buffer: destination.write(s) df.to_csv(destination, **kwargs) destination.seek(0) elif atomic: with atomic_write(destination, mode="w", overwrite=True) as f: f.write(s) # https://stackoverflow.com/a/29233924 df.to_csv(f, **kwargs) else: with open(destination, mode="w") as f: f.write(s) df.to_csv(f, **kwargs) return destination else: logger.warning("no data to save...") return ""
def save_cache_file(self, data): with atomic_write(self.cache_file, overwrite=True) as cache_file: json.dump(data, cache_file)