def test_file_not_copied(tmpdir): tmpdir.join('old_file').write('contents here') f = AtomicFile(tmpdir.join('old_file').strpath, copy_existing=False) assert f.read() == '' f.close()
def metadataToSummaryJSON(rsid, metadata, writeFile=True, doStats=True): logger.info("%s writing summary json", rsid) summary = { "recordset_id": rsid, "filename": metadata["name"], "filemd5": metadata["filemd5"], "harvest_date": metadata["processing_start_datetime"], "records_count": 0, "records_create": 0, "records_update": 0, "records_delete": 0, "mediarecords_count": 0, "mediarecords_create": 0, "mediarecords_update": 0, "mediarecords_delete": 0, "datafile_ok": True, "commited": metadata["commited"], "paused": metadata["paused"] } if metadata["filemd5"] is None: summary["datafile_ok"] = False if writeFile: with AtomicFile(rsid + ".summary.json", "wb") as jf: json.dump(summary, jf, indent=2) return summary csv_line_count = 0 no_recordid_count = 0 duplicate_record_count = 0 duplicate_id_count = 0 for t in metadata["counts"].values(): csv_line_count += t["total_line_count"] no_recordid_count += t["no_recordid_count"] duplicate_record_count += t["duplicate_record_count"] duplicate_id_count += t["duplicate_id_count"] if t["type"] in ingestion_types: typ = ingestion_types[t["type"]] summary[typ + "_count"] += t["processed_line_count"] for op in ["create", "update", "delete"]: if op in t: summary[typ + "_" + op] += t[op] summary["csv_line_count"] = csv_line_count summary["no_recordid_count"] = no_recordid_count summary["duplicate_occurence_count"] = duplicate_record_count summary["dublicate_occurence_ids"] = duplicate_id_count if doStats: stats.index(doc_type='digest', body=summary) if writeFile: with AtomicFile(rsid + ".summary.json", "wb") as jf: json.dump(summary, jf, indent=2) with AtomicFile(rsid + ".metadata.json", "wb") as jf: json.dump(metadata, jf, indent=2) else: return summary
def test_file_moved_only_after_close(tmpdir): fn = str(tmpdir.join('test')) f = AtomicFile(fn) f.write('this is a test') assert os.path.exists(fn) == False f.close() assert os.path.exists(fn)
def test_tmp_file_created_in_same_dir(tmpdir): fn = str(tmpdir.join('blkajdf')) assert len(tmpdir.listdir()) == 0 f = AtomicFile(fn) f.write('nothing') assert len(tmpdir.listdir()) assert tmpdir.listdir()[0].fnmatch('.blkajdf*') f.close()
def test_write(self): create_test_file(self.filename) af = AtomicFile(self.filename) expected = "this is written by AtomicFile.\n" af.write(expected) af.close() result = open(self.filename, 'r').read() try: self.assertEqual(result, expected) finally: os.remove(self.filename)
def atomic_save(self, dst, buffer_size=16384): """Overrides werkzeug's original FileStorage save method to provide atomic file save.""" if type(dst) is str: # Create atomic file handle handle = AtomicFile(dst, 'wb') # Call original method self._save_original(handle, buffer_size) # Close AtomicFile handle, which will rename the file into place handle.close() else: # Don't change anything if we've been given a stream self._save_origin(dst, buffer_size)
def download(self, base_dir: Union[str, Path] = Path("."), overwrite: bool = False, warn: bool = True) -> None: if isinstance(self.client, local.Local): return post_dir = Path(base_dir) / "{fetched_from}-{id}".format(**self.info) post_dir.mkdir(parents=True, exist_ok=True) for res in ("info", "artcom", "notes", "media"): if res == "media" and "file_ext" not in self.info: LOG.warning( "No decensor data found for post %d, " "can't download media.", self.info["id"]) continue ext = "json" if res != "media" else self.info["file_ext"] ext = "webm" if ext == "zip" else ext path = post_dir / f"{res}.{ext}" if path.exists() and not overwrite: if warn: LOG.warning("Not overwriting %r", str(path)) continue content = getattr(self, res) if not content: continue if res != "media": with AtomicFile(path, "w") as out: out.write("%s%s" % (utils.jsonify(content).rstrip(), os.linesep)) continue if self.info["file_ext"] != "zip": LOG.info("Downloading %s of %s for post %d", self.info["file_ext"].upper(), utils.bytes2human(self.info["file_size"]), self.info["id"]) else: LOG.info("Downloading WebM ugoira for post %d", self.info["id"]) with AtomicFile(path, "wb") as out: for chunk in content: # pylint: disable=not-an-iterable out.write(chunk)
def save_lastnode_id(): """Save the id of the last node created.""" init_counter() with FileLock(_COUNTER_FILE): with AtomicFile(_COUNTER_FILE, mode="w") as fh: fh.write("%d\n" % _COUNTER)
def columnize(ifile, ofile): #column -ts ',' summary.csv | sort > summary.pretty.txt p = subprocess.Popen(['column', '-ts', ',', ifile], stdout=subprocess.PIPE) lines = p.stdout.readlines() with AtomicFile(ofile, 'w', encoding='utf-8') as out: for l in lines: out.write(l)
def _index_del(self, *line_nums: int) -> None: LOG.info("Deleting from index %d post dirs...", len(line_nums)) with open (self.index, "r", newline="") as in_file, \ AtomicFile(self.index, "w") as out_file: for i, line in enumerate(in_file, 1): if i not in line_nums: out_file.write(line)
def test_close(self): af = AtomicFile(self.filename) af.write(b"test\n") af.close() try: af.write(b"test again\n") self.fail('ValueError not raised') except ValueError: pass finally: os.remove(self.filename)
def save_cache(name, data): local_cache[name] = data last_saved = local_cache_last_saved.get(name, -1) now = time.time() if now - last_saved > SAVE_DELAY: local_cache_last_saved[name] = now cache_path = os.path.join(CACHE_PATH, name + '_cache.json') cache_str = json.dumps(data, indent=4) with AtomicFile(cache_path, 'w') as f: f.write(cache_str) f.close()
def write_labels(url, sheet_number=0): dat = get_run_ranges(url, sheet_number=sheet_number) arr = np.array([tuple(row) for row in dat], dtype=np.dtype("d, d, U200, U200")) with AtomicFile('labels.txt', 'w') as f: np.savetxt(f, arr, fmt=['%04d', '%04d', '%s', '%s'], header='start run, end run, label1, label2', delimiter=',') print "wrote to labels.txt" return arr
def get_batch(name: str) -> None: if name in existing and name != order_batches[-1]: return answer = requests.get(batches_url[name]) try: answer.raise_for_status() except requests.RequestException: return with AtomicFile(BATCHES_DIR / name, "w") as file: file.write(answer.text)
def _save_config_json(filename, fields): """ Save a json file containing a dict of variables. """ output = json.dumps(fields, sort_keys=True, indent=4, separators=(',', ': ')) with AtomicFile(filename, 'w') as f: f.write(output)
def add_visit(number=1): counter = 0 try: with open(COUNTER_PATH, "r") as f: counter = int(f.read()) except: pass counter += number with AtomicFile(COUNTER_PATH, "w") as f: f.write(str(counter)) f.close()
def test_with(self): data = b"this is written by AtomicFile.\n" with AtomicFile(self.filename) as f: f.write(data) try: f.write(data) self.fail("'ValueError: I/O operation on closed file' not raised") except ValueError: pass finally: os.remove(self.filename)
def test_close(self): af = AtomicFile(self.filename) af.write('test\n') af.close() try: af.write('test again\n') self.fail('ValueError not raised') except ValueError: pass finally: os.remove(self.filename)
def segment_image(path, n_segments=N_SEGMENTS): img = img_as_float(imread(path)) segment_file = path + "." + str(n_segments) + ".segments" if os.path.isfile(segment_file): return img, np.load(segment_file) print "Segmenting ", path segments = slic(img, n_segments=N_SEGMENTS, compactness=10, sigma=1) with AtomicFile(segment_file, 'wb') as fd: np.save(fd, segments) return img, segments
def test_file_creation(tmpdir): fn = str(tmpdir.join('test')) f = AtomicFile(fn) f.write('this is a test') f.close() with open(fn) as f: assert f.read() == 'this is a test'
def insert_results(cls, filepath, struct_type, formula, accuracy, pseudos, results): """ Update the entry in the database. """ with FileLock(filepath): outdb = cls.from_file(filepath) old_dict = outdb[struct_type][formula] if not isinstance(old_dict, dict): old_dict = {} old_dict[accuracy] = results outdb[struct_type][formula] = old_dict with AtomicFile(filepath, mode="wt") as fh: json.dump(outdb, fh, indent=-1, sort_keys=True) #, cls=MontyEncoder)
def get_contents_to_filename(key, filename, md5=None): """Wraps ``key.get_contents_to_filename`` ensuring an atomic fetch The default version on a key will leave partial results if the :param boto.s3.key.Key key: The key to fetch :param str filename: The filename to fetch into :param str md5: If given compare the downloaded md5 to the given digest hash """ from atomicfile import AtomicFile with AtomicFile(filename, mode='wb') as af: IDigBioStorage.get_contents_to_file(key, af, md5=md5) return filename
def _save(self, request_file, media_path=PRIVATE_FILES_PATH): filename = str(time.time()) ext = os.path.splitext(request_file.filename)[1].lower() dirpath = os.path.join(media_path) if not os.path.exists(dirpath): os.makedirs(dirpath) path = os.path.join(dirpath, '%s%s' % (filename, ext)) with AtomicFile(path, 'wb') as uploaded: uploaded.write(request_file.body) display_name = request_file.filename url = '%s%s' % (filename, ext) return File(display_name, url)
def test_permissions(self): expected_mode = 0o741 create_test_file(self.filename, mode=expected_mode) af = AtomicFile(self.filename) af.write(b"I don't really care of the content.\n") af.close() st_mode = os.lstat(self.filename).st_mode & 0o777 try: self.assertEqual(st_mode, expected_mode) finally: os.remove(self.filename)
def save_all(self): if self.saving: print "Cannot save because currently saving." return False else: self.saving = True try: if self.settings.process_with == "pyborg" and self.settings.no_save != "True": print "Writing dictionary..." with zipfile.ZipFile('archive.zip', 'w', zipfile.ZIP_DEFLATED) as z: s = marshal.dumps(self.words) z.writestr('words.dat', s) s = marshal.dumps(self.lines) z.writestr('lines.dat', s) #save the version z.writestr('version', self.saves_version) for filename, data in [ ('words.txt', self.words), ('sentences.txt', self.unfilterd), ]: with AtomicFile(filename, 'w') as f: # write each words known wordlist = [] #Sort the list befor to export for key in data.keys(): wordlist.append([key, len(str(data[key]))]) wordlist.sort(lambda x, y: cmp(x[1], y[1])) #map((lambda x: f.write(str(x[0]) + "\n\r")), wordlist) [ f.write(str(x[0].encode('utf-8')) + "\n\r") for x in wordlist ] # Save settings self.settings.save() print "Dictionary saved." return True finally: self.saving = False
def save(self, filename, fsrc): """Save the specified file into the respository.""" # Get hash fhash = self.hash(fsrc) # See if the file exists and just link it if possible (dedupe) if self._exists(fhash): link = self._link(fhash, filename) if link is not None: return link # Create the file target = self.storage.path(join(fhash, filename)) mkdir_p(dirname(target)) with AtomicFile(target, 'wb') as fdst: fsrc.seek(0) shutil.copyfileobj(fsrc, fdst) return target
def test_write(self): create_test_file(self.filename) af = AtomicFile(self.filename) expected = b"this is written by AtomicFile.\n" af.write(expected) af.close() f = open(self.filename, "rb") result = f.read() f.close() try: self.assertEqual(result, expected) finally: os.remove(self.filename)
def api_update_handler(): # TODO [basri] is it a good idea to strip spaces from user's own text? updated_content = _get_request_json('updated_content') if not updated_content: return _failure_json('File content is empty') requested_path = _get_request_json('file_path') path, err = _get_real_path(requested_path) if err: return _failure_json(err) try: with AtomicFile(path, 'w') as f: f.write(updated_content) return jsonify({'result': 'success'}) except Exception as e: logging.error('There is an error while writing: %s. Error: %s', path, str(e)) # Don't leak the absolute path. return _failure_json(('Writing %s failed' % requested_path))
def _save_config_cfg(filename, fields): """ fields should be a dictionary. Keys as names of variables containing tuple (string comment, value). """ # Must use same dir to be truly atomic with AtomicFile(filename, 'w') as f: # write the values with comments. this is a silly comment for key in fields.keys(): f.write("# " + fields[key][0] + "\n") s = repr(fields[key][1]) f.write(key + "\t= ") if len(s) > 80: cut_string = "" while len(s) > 80: position = s.rfind(",", 0, 80) + 1 cut_string = cut_string + s[:position] + "\\\n\t\t" s = s[position:] s = cut_string + s f.write(s + "\n")
def save(self): if not self.acquire_write(): logger.debug("writer exists, will not save") else: content = dumps(self._d) # write to file try: with AtomicFile(self.filename, createmode=0o666) as f: f.write(char_encoding(content)) f._fp.flush() os.fsync(f.fileno()) except Exception: logger.error("save to cache file failed", exc_info=True) self.release_write() # change permission try: os.chmod(self.filename, 0o666) except Exception: logger.debug("cache file permission change failed: %s", self.filename)
def write_citation_file(dl_id, t, query, recordsets): filename = "{0}.{1}.citation.txt".format(dl_id, t) logger.debug("Generating citation file: %r", filename) rs_strings = [] total_recs = 0 total_rs = len(recordsets.keys()) for rs, rsc in sorted([(rs, recordsets[rs]) for rs in recordsets], key=lambda x: x[1], reverse=True): rs_strings.append( "http://www.idigbio.org/portal/recordsets/{0} ({1} records)". format(rs, rsc)) total_recs += rsc if total_recs == 0: return None query_string = json.dumps(query) now = datetime.datetime.now() rs_string = "\n".join(rs_strings) + "\n" with AtomicFile(filename, "wb") as citefile: # 0: Current Year # 1: Query Text # 2: Total Number of Records # 3: Access Datetime # 4: Number of recordsets # 5: List of recordset IDs and counts citefile.write( citation_format.format( now.year, # 0: Current Year query_string, # 1: Query Text total_recs, # 2: Total Number of Records now.isoformat(), # 3: Access Datetime total_rs, # 4: Number of recordsets rs_string, # 5: List of recordset IDs and counts )) return filename
def test_encoding(self): data = u"Unicode Capit\xe1n is written by AtomicFile.\n" encoding = "utf-8" af = AtomicFile(self.filename, "wb", encoding=encoding) af.write(data) af.close() f = codecs.open(self.filename, "rb", encoding=encoding) decoded_result = f.read() f.close() f = open(self.filename, "rb") raw_result = f.read() f.close() try: self.assertEqual(data, decoded_result) self.assertEqual(data.encode(encoding), raw_result) finally: os.remove(self.filename)
def writecache(value): logger.debug("Writing cache to %r", filename) with AtomicFile(filename, 'wb') as f: cPickle.dump(value, f)
def write(cls, playback_buffer): with AtomicFile(PlaybackBuffer.buffer_state_file, 'w') as f: f.write(str(playback_buffer))
def do_scrap_wrongies(data_in=_WRONGDATA_LOGFILE, debug_out=_OUTDATA_LOGFILE, dirmeta=_DIRDATA_LOGFILE, dirsubdir=_DIRSUBDIR): # ### Per wrongdata-logfile (with dl-continuing support) ### log = _log.getChild("do_scrap_wrongies") # Assuming the files are not too giant (reasonably enough): in_data = unjsl(data_in) debug_data_base = unjsl_or_empty(debug_out) debug_data = {d['url']: d for d in debug_data_base} to_debug = functools.partial( onjsl, debug_out) # lambda data: onjsl(debug_out, data) all_checked_urls = {} # to_meta = lambda target_dir, data: onjsl(os.path.join(target_dir, dirmeta), data) # ... existing_cache = {} # meta_file -> {url -> rmeta} def get_meta_existing(meta_file, cache=existing_cache): try: return cache[meta_file] except KeyError: log.debug("Loading meta_existing %r", meta_file) meta_existing = {v['url']: v for v in unjsl_or_empty(meta_file)} cache[meta_file] = meta_existing return meta_existing # ... for wrongie in in_data: # ### Per reddit link (basically) with possibly several images there ### # Example `wrongie`: {"url": "http://500px.com/photo/29700163", # "target_dir": "/home/hell/files/wp//reddit_earthporn", # "_downloaded": 8, "_filecount": 0, "_filename": "1m90ui"} url = wrongie['url'] if url in debug_data: log.log(15, "Already processed wrongie: %r", url) continue # Already processed, presumably. log.log(13, "Processing wrongie %r (%r)", url, wrongie) # ... target_dir = os.path.join(wrongie['target_dir'], dirsubdir) mkdirs(target_dir) # ... meta_file = os.path.join(target_dir, dirmeta) meta_existing = get_meta_existing(meta_file) # url -> rmeta dmeta = dict(wrongie) # debug-out data # ... # NOTE: long request-y process. try: stuff = img_scrap_stuff.do_horrible_things( url, urls_to_skip=all_checked_urls) except GetError: log.error("Skipping wrongie %r", wrongie) continue # stuff = ([checked_url, ...], [(image_url, image_data, {'resp': ..., ...}), ...]) checked_urls, found_images = stuff all_checked_urls.update({u: 1 for u in checked_urls}) dd_processed = debug_data.setdefault('processed', []) for imgurl, imgdata, extras in found_images: # ### Per image file (known to be large) ### if imgurl in meta_existing: log.log(14, "Probably already saved: %r", imgurl) #continue # Actually, whatever log.log(12, " ... %r", imgurl) # NOTE: will be duplicated on each line. rmeta = dict(base=wrongie) # dirmeta-out data resp = extras.get('resp') filename_img = make_filename(imgurl, imgdata, resp=resp) # per-`url` small hash filename_group = wrongie.get('_filename', str2hash(url, 8)) # NOTE: numeric-annotation would not make sense as found_images is url-sorted. filename = '%s__%s' % (filename_group, filename_img) filename_full = os.path.join(target_dir, filename) # For uniqueness (non-overwriting), assuming we don't try to re-download stuff. filename_target = consecutive_filename(filename_full) _exdata = dict(filename_base=filename, filename=filename_target, url=imgurl) rmeta.update(_exdata) with AtomicFile(filename_target) as f: f.write(imgdata) # ... onjsl(meta_file, rmeta) meta_existing[imgurl] = rmeta # make sure we don't try it again # Note: might be lost (as rmeta is written already but dmeta isn't yet) dd_processed.append(dict(_exdata)) # Per reddit link again (after downloading all images is done) # Write it down so we don't pester it again dmeta.update(processed=dd_processed) debug_data[url] = dmeta to_debug(dmeta) # Per wrongdata-logfile again. Nothing to do here after all that. log.info("Done, apparently") return locals() # In case some post-debug is desired.
def test_file_copied(tmpdir): tmpdir.join('old_file').write('contents here') f = AtomicFile(tmpdir.join('old_file').strpath) assert f.read() == 'contents here' f.close()
def _index_add(self, post_dirnames: List[str]) -> base.InfoGenType: LOG.info("Indexing %d posts...", len(post_dirnames)) post_dirnames.sort(key=lambda d: fast_int(d.split("-")[-1], -1), reverse=True) if not self.index.exists(): self.index.write_text("") with open (self.index, "r", newline="") as in_file, \ AtomicFile(self.index, "w") as out_file: # Not just using csv.DictReader for performance reasons id_idx = list(POST_FIELDS.keys()).index("id") reader = csv.reader(in_file, delimiter="\t") src_row_writer = csv.writer(out_file, delimiter="\t") new_info_writer = csv.DictWriter(out_file, delimiter="\t", fieldnames=POST_FIELDS.keys(), extrasaction="ignore") pool = ThreadPool(mp.cpu_count() * 5) tasks = [ pool.apply_async(self._get_info, (p, )) for p in post_dirnames ] def info_gen(): for task in tasks: try: yield task.get() except (FileNotFoundError, NotADirectoryError) as err: if str(err.filename) != self.index.name: LOG.error(str(err)) info_gen = info_gen() try: new_info = next(info_gen) except StopIteration: return no_more_to_add = False for source_row in reader: try: src_id = fast_int(source_row[id_idx], raise_on_invalid=True) except ValueError: LOG.error("Removing invalid row in index: %r", source_row) continue while not no_more_to_add and new_info["id"] > src_id: new_info_writer.writerow(new_info) yield new_info try: new_info = next(info_gen) except StopIteration: no_more_to_add = True src_row_writer.writerow(source_row) if not no_more_to_add: new_info_writer.writerow(new_info) yield new_info for remaining_info in info_gen: new_info_writer.writerow(remaining_info) yield remaining_info
def test_abort(tmpdir): fn = str(tmpdir.join('test')) f = AtomicFile(fn) f.write('test') f.abort() assert len(tmpdir.listdir()) == 0 f = AtomicFile(fn) f.write('test') del f assert len(tmpdir.listdir()) == 0 tmpdir.join('test').write('test file is this') f = AtomicFile(fn) f.write('this is something else') f.abort() assert tmpdir.join('test').read() == 'test file is this' f = AtomicFile(fn) f.write('this is something else') del f assert tmpdir.join('test').read() == 'test file is this'