def test_read(self): FILENAME.write_text('hello') with safer.open(FILENAME, 'r+', temp_file=True) as fp: assert fp.read() == 'hello' with self.assertRaises(ValueError): safer.open(FILENAME, 'r+')
def test_all_modes(self): modes = 'w', 'r', 'a', 'r+', 'w+', 'a', 'a+' for m in modes: with safer.open(FILENAME, m, temp_file=True): pass with safer.open(FILENAME, m + 'b', temp_file=True): pass
def fix_nulls(filepath): """Removes null characters from the given file and overwrites it atomically. When the device get unplugged mid-write to a CSV, null characters get introduced, corrupt the CSV, and cause later Athena queries to fail. This function makes a basic attempt at turning the CSV valid again. """ with safer.open(filepath, "rb") as fr: content = fr.read().replace(b"\x00", b"") with safer.open(filepath, "wb") as fw: fw.write(content) return filepath
def test_mode_error1(self, safer_writer): with safer.open(FILENAME, 'w') as fp: pass with open(FILENAME) as fp: with self.assertRaises(ValueError) as e: safer_writer(fp) assert e.exception.args[0] == 'Stream mode "r" is not a write mode'
def test_dry_run(self, safer_writer): assert not FILENAME.exists() with safer.open(FILENAME, 'w', dry_run=True) as fp1: assert not FILENAME.exists() fp1.write('one') assert not FILENAME.exists() assert not FILENAME.exists()
def write(self, fp): if isinstance(fp, str): with safer.open(fp, 'w') as fp2: return self.write(fp2) print(json.dumps(self.header), file=fp) for line in self.lines: print(json.dumps(line.to_list()), file=fp)
def test_int_filename(self): with self.assertRaises(TypeError) as m: with safer.open(1, 'w', temp_file=True) as fp: fp.write('hello') arg = m.exception.args[0] assert arg == '`name` must be string, not int'
def test_nested_writers(self, safer_writer): with safer.open(FILENAME, 'w') as fp1: fp1.write('one') with safer_writer(fp1) as fp2: fp2.write('two') fp2.write('three') fp1.write('four') assert FILENAME.read_text() == 'onetwothreefour'
def test_temp_file1(self): temp_file = FILENAME.with_suffix('.temp_file') with safer.open(FILENAME, 'w', temp_file=temp_file) as fp: assert temp_file.exists() assert os.path.exists(temp_file) fp.write('hello') assert FILENAME.read_text() == 'hello' assert not temp_file.exists()
def test_make_parents(self, safer_open): FILENAME = Path('foo/test.txt') with self.assertRaises(IOError): with safer.open(FILENAME, 'w'): pass with safer_open(FILENAME, 'w', make_parents=True) as fp: fp.write('hello') assert FILENAME.read_text() == 'hello'
def test_file_error(self, safer_writer): with safer.open(FILENAME, 'w') as fp1: fp1.write('one') with self.assertRaises(ValueError): with safer_writer(fp1) as fp2: fp2.write('two') fp2.write('three') raise ValueError fp1.write('four') assert FILENAME.read_text() == 'onefour'
def _get_or_create_todos(self) -> dict: if os.path.exists(self.todos_path): with open(self.todos_path) as file: return json.load(file) os.makedirs(self.todos_dir, exist_ok=True) with safer.open(self.todos_path, "w") as file: json.dump(self.DEFAULT_TODOS, file) return self.DEFAULT_TODOS
def get_tomorrows_RSS_fuel_data(filter=True, **kwargs): """Get tomorrows Fuel Watch Data and save to file """ file_failed_list = [] days = ["tomorrow"] kwargs["Day"] = "tomorrow" # Get the dict of products to iterate over prod = filters.Product() # Cycle through the available days and collect fuel watch data if tomorrow_RSS_data_available(): for day in days: kwargs["Day"] = day for key in prod.keys(): # FilterName is not a Product if key != "FilterName": kwargs["Product"] = key # Construct URL to get fuel watch data url = construct_fuel_watch_url(**kwargs) # Get tomorrows fuel watch data get_data = requests.get(url) # If response is ok, 200 parse the data if get_data.status_code == 200: parsed_data = feedparser.parse( get_data.content)["entries"] # Make a file name with the the parsed data parameters # for saving to disk. file_name = (join_string_with_operator( values=[ parsed_data[0]["updated"], kwargs["StateRegion"], kwargs["Product"], ], operator="_", ) + ".json") # Use safer to save to disk to ensure files are # not corrupted before closing. with safer.open(os.path.join("fuel_data", file_name), "w") as fp: json.dump( parsed_data, fp) # All of the file is written, or none if not check_file_name_saved_on_disk(file_name): file_failed_list.append(file_name) print("Files Failed ", file_failed_list)
def test_binary(self, safer_open): with safer_open(FILENAME, 'wb') as fp: fp.write(b'hello') fp.write(b' there') with self.assertRaises(TypeError): fp.write('hello') with open(FILENAME, 'rb') as fp: assert fp.read() == b'hello there' with safer.open(FILENAME, 'rb') as fp: assert fp.read() == b'hello there'
def test_temp_file2(self): temp_file = FILENAME.with_suffix('.temp_file') with self.assertRaises(ValueError) as e: with safer.open(FILENAME, 'w', temp_file=temp_file) as fp: assert temp_file.exists() fp.write('hello') raise ValueError('Expected') assert e.exception.args[0] == 'Expected' assert not FILENAME.exists() assert not temp_file.exists()
def test_nested_writers_dry_run(self, safer_writer): assert not FILENAME.exists() with safer.open(FILENAME, 'w', dry_run=True) as fp1: assert not FILENAME.exists() fp1.write('one') with safer_writer(fp1, dry_run=True) as fp2: assert not FILENAME.exists() fp2.write('two') fp2.write('three') assert not FILENAME.exists() fp1.write('four') assert not FILENAME.exists()
def get_blocks_from_file(cls, fileNumber): """ Get all blocks from a file """ obj = cls.__new__(cls) block_file = f"{Config.BLOCKS_DIR}blk{blk_file_format(fileNumber)}.dat" blocks = [] with safer.open(block_file, "rb") as f: file_bytes = f.read() while file_bytes: block_size = int.from_bytes(file_bytes[:4], byteorder="little", signed=False) blocks.append(obj.deserialize(file_bytes[4:block_size + 4])) file_bytes = file_bytes[4 + block_size:] return blocks
def test_explicit_close(self): FILENAME.write_text('hello') assert FILENAME.read_text() == 'hello' before = set(os.listdir('.')) fp = safer.open(FILENAME, 'w', temp_file=True) fp.write('OK!') assert FILENAME.read_text() == 'hello' after = set(os.listdir('.')) assert len(before) + 1 == len(after) assert len(after.difference(before)) == 1 fp.close() self.assertEqual(FILENAME.read_text(), 'OK!') assert FILENAME.read_text() == 'OK!' after = set(os.listdir('.')) assert before == after
def commit(self, host: str) -> bool: """Persist current API information to file Parameters ---------- host : str Host to persist Returns ------- bool """ filename = 'config/apidata/%s' % config().getHostfile(host) with safer.open(filename, 'w') as f: f.write(json.dumps(self.__data[host], indent=2)) print('Saved data for %s ...' % host) return True
def save(self) -> None: """ if not self.validate(): raise BlockNotValidError("Block is not valid") """ file_number = self.current_file_number current_block_file = f"{Config.BLOCKS_DIR}{get_current_blk_file(self.current_file_number)}" with safer.open(current_block_file, "ab") as w: serialized_block = self.serialize() block_size = len(serialized_block) w.write( block_size.to_bytes(4, byteorder="little", signed=False) + serialized_block) print(Block.get_blocks_from_file(0)) self.set_tip(self.height) self.set_block_file_number(self.height, file_number) miner_balance = self.get_account(self.miner) miner_reward = mining_reward(self.height) print(f"Miner reward: {miner_reward}")
def ld_list(list_livedisc, bandwidth): for dList in list_livedisc: with safer.open(dList) as f: urlList = f.readlines() urlList = [x.strip() for x in urlList] try: random.shuffle(urlList) for item in urlList: if item != "": print("download: " + item) download_livedisk(str(item), bandwidth) except KeyboardInterrupt: print("\nInterupt by User\n") exit() except: print("error: " + sys.exc_info()[0]) finally: sys.exit(ExitStatus.success)
def main(): analysis = processor.sources_analysis.do_analysis() nlp_en = spacy.load("en_core_web_lg") nlp_fr = spacy.load("fr_core_news_lg") pairs = [(k, v) for k, v in sentences.items() if True or ('"' not in k and '-' not in k)] # pairs.sort(key=lambda sentence: rate_french_sentence_easiness(sentence[0], analysis), reverse=True) pairs.sort(key=lambda sentence: rate_french_sentence_reading_level( sentence[0], analysis), reverse=True) data = { k: v for pair in pairs[:50] for k, v in get_nlp_info(nlp_en, nlp_fr, pair, analysis).items() } dump = yaml.dump(data, Dumper=Dumper, allow_unicode=True) with safer.open("work/nlp_sentences.yaml", "w", encoding='utf-8') as f: f.write(dump)
def main(analysis = None): if isfile("handmade_dictionary.yaml"): print("can't create handmade_dictionary.yaml, it already exists!") return if analysis == None: analysis = processor.sources_analysis.do_analysis() (collected_words, collected_sentences, source_info) = analysis instructions = """ When adding verbs, use the `get_conjugations.py` script to easily generate the YAML required (this is very time-consuming to do by hand). """ known_words = get_all_known_french_words() unknown_words = collected_words.keys() - known_words unknown_words = sorted(list(unknown_words), key=(lambda w: -sum([v for k, v in collected_words[w].items() if True or k[0] == 'le_petit_nicolas']))) with safer.open("handmade_dictionary.yaml", "w", encoding='utf-8') as f: template = {word: [{'display': word, 'gender': '', 'pos': '', 'translations': ['']}] for word in unknown_words[:20]} data = yaml.dump(template, Dumper=Dumper, allow_unicode=True) f.write("# " + "\n# ".join(instructions.split("\n")) + "\n\n") f.write(data)
def on_save(path): """Appends the count for the last minute to a file on disk.""" global count now = datetime.datetime.utcnow() marker = common.write_marker(path, now) logger.debug("Updated local cache marker to %s", marker) if count == 0: logger.debug("Skipping save: no rotations in the last minute") return with safer.open( os.path.join(path, f"{marker}.csv"), mode="a", encoding="utf-8", ) as f: f.writelines(f"{now:%Y-%m-%dT%H:%M:%SZ},{count}\n") logger.info("Stored %s rotations per minute in local cache", count) semaphore.acquire() count = 0 semaphore.release()
def do_analysis(): print("Analyzing source files...") source_paths = ["../books/hp", "../books/inner_french_podcast", "../books/hprat", "../books/le_petit_prince", '../books/alcatraz', '../books/le_petit_nicolas'] source_files = [Path(join(p, f)) for p in source_paths for f in listdir(p) if isfile(join(p, f))] collected_words = {} collected_sentences = {} source_info = {} for filename in source_files: frontmatter = "" source = tuple(filename.parts[-2:]) with safer.open(filename, "r", encoding='utf-8') as f: out_of_frontmatter = False for index, line in enumerate(f): if retain_only_characters(line).strip() == "": continue if index == 0 and line.strip() == "---": out_of_frontmatter = False continue else: out_of_frontmatter = True if index > 0 and line.strip() == "---": out_of_frontmatter = False if out_of_frontmatter == False: frontmatter += line else: line = clean_line(line) words = line_to_words_french(line) for word in words: collected_words[word] = collected_words.get(word, Counter()) collected_words[word][source] += 1 if "»" in line or "»" in line or line == "": # Don't want to deal with these right now pass else: # I was hoping this would work but it fails by splitting "– Allez, ouste ! s'exclama Mr Dursley." into two seperate sentences. """ doc = nlpfr(line) for sentence in doc.sents: print(sentence) if False and sentence != "" and len(sentence.split()) > 1: collected_sentences[sentence] = collected_sentences.get(sentence, {}) collected_sentences[sentence][source] = collected_sentences[sentence].get(source, []) + [index] """ sentences = re.findall(r'(?:["«A-ZÉÀÂÄÈÉÊËÎÏÔŒÙÛÜŸÇ]).*?(?:(?:[.?!]["»]?)|-")(?=$| ["«]?[A-ZÉÀÂÄÈÉÊËÎÏÔŒÙÛÜŸÇ])', line.strip()) sentences = [sentence.strip('– ') for sentence in sentences] # sentences = [sentence.strip('"') if sentence.count('"') == 2 and sentence[0] == '"' and sentence[-1] == '"' and sentence[-2] != "-" else sentence for sentence in sentences] # sometimes sentences are wrapped in "s sentence_buildup = "" # sometimes we have false-negatives where sentences end where they shouldn't, mostly in names like M. McGonagall or whatever. This detects those for sentence in sentences: if sentence[-3:] == " M." or sentence[-3:] == " H." or sentence[-4:] == " Dr." or sentence[-3:] == " D." or sentence[-3:] == " J.": sentence_buildup += sentence + " " else: sentence = sentence_buildup + sentence sentence_buildup = "" sentence = sentence.strip() sentence = sentence.strip('– ') sentence = sentence.replace('"', "").strip() if sentence.count('"') == 1 else sentence sentence = sentence.strip() if sentence != "" and len(sentence.split()) > 1: collected_sentences[sentence] = collected_sentences.get(sentence, {}) collected_sentences[sentence][source] = collected_sentences[sentence].get(source, []) + [index] try: source_info[source] = yaml.load(frontmatter, Loader=Loader) except: print(f"trouble processing frontmatter for {source}") print(frontmatter) return Analysis(words=collected_words, sentences=collected_sentences, source_info=source_info)
def get_translations(): with safer.open("translations.yaml", encoding='utf-8') as f: translations = yaml.load(f, Loader=Loader) return translations
def get_word_dictionary(): with safer.open("worddictionary.yaml", encoding='utf-8') as f: word_dictionary = yaml.load(f, Loader=Loader) return word_dictionary
def write_help(): with safer.open(HELP_FILE, 'w') as fp: fp.write(get_help())
def read_file(path: Union[str, Path], encoding: str = "utf-8") -> str: """Safely open .ipynb file.""" with safer.open(resolve(path), "r", encoding=encoding) as ipynb_infile: return cast(str, ipynb_infile.read())
def _error(self, mode='w', **kwds): with self.assertRaises(ValueError) as e: safer.open(FILENAME, mode, temp_file=True, **kwds) return e.exception.args[0]