def _edit(args): """Edit the mapping directly using an editor.""" if args: abort(USAGE + 'dtags: too many arguments') try: with TempFile( mode='w+t', delete=False, prefix='mapping.', dir=CFG_DIR ) as tfile: with io.open(MAPPING_FILE, 'rt') as mapping_file: tfile.write(EDIT_HELP_COMMENTS + mapping_file.read()) tfile.flush() except (OSError, IOError) as err: abort('dtags: failed to edit mapping: {}'.format(err), err.errno) else: editor = shlex.split(os.environ.get('EDITOR')) if not editor: abort('dtags: undefined environment variable: ' + style.bad('EDITOR')) try: sp.check_call(editor + [tfile.name]) except sp.CalledProcessError as err: abort('dtags: failed to edit mapping: {}'.format(err.message)) else: mapping, excluded = parse_mapping(tfile.name) save_mapping(mapping) rm_files(tfile.name) if excluded: print('Cleaned the following entries:\n' + excluded + '\n') finish('New entries saved successfully')
def write(data, path, content=None): try: temp_file = TempFile(delete=False) ext = os.path.splitext(path)[1].lower() if ext == '.csv': csv.write(data, temp_file.name) else: omv.write(data, temp_file.name, content) temp_file.close() os.replace(temp_file.name, path) except Exception as e: if os.name == 'nt' and e.winerror == 17: temp_path = path + '.tmp' shutil.copy(temp_file.name, temp_path) os.remove(temp_file.name) os.replace(temp_path, path) else: try: os.remove(temp_file.name) except Exception: pass raise e
def translate_files_slurm(args, cmds, expected_output_files): conda_env = '/private/home/pipibjc/.conda/envs/fairseq-20190509' for cmd in cmds: with TempFile('w') as script: sh = f"""#!/bin/bash source activate {conda_env} {cmd} """ print(sh) script.write(sh) script.flush() cmd = f"sbatch --gres=gpu:1 -c {args.cpu + 2} {args.sbatch_args} --time=15:0:0 {script.name}" import sys print(cmd, file=sys.stderr) check_call(cmd, shell=True) # wait for all outputs has finished num_finished = 0 while num_finished < len(expected_output_files): num_finished = 0 for output_file in expected_output_files: num_finished += 1 if check_finished(output_file) else 0 if num_finished < len(expected_output_files): time.sleep(3 * 60) print("sleeping for 3m ...")
def test_restkey(self): with TempFile() as f: f.write("テスト1,テスト2\nテスト1,テスト2".encode('utf-8')) f.flush() loader = CSVLoader(f.name, ['c1'], 'utf-8', restkey='garbage') for row in loader: self.assertEqual('テスト1', row['c1']) self.assertEqual(['テスト2'], row['garbage'])
def test_cp932(self): with TempFile() as f: f.write("テスト1,テスト2".encode('cp932')) f.flush() loader = CSVLoader(f.name, None, 'cp932') for row in loader: self.assertEqual('テスト1', row['c0']) self.assertEqual('テスト2', row['c1'])
def test_restval(self): with TempFile() as f: f.write("テスト1,テスト2\nテスト1,テスト2".encode('utf-8')) f.flush() loader = CSVLoader(f.name, ['c1', 'c2', 'c3'], 'utf-8', restval='<blank>') for row in loader: self.assertEqual('テスト1', row['c1']) self.assertEqual('テスト2', row['c2']) self.assertEqual('<blank>', row['c3'])
def class_from_module_repr(cls_name, module_repr): path = osp.join(gettempdir(), f'{getuser()}_pyg_jit') makedirs(path) with TempFile(mode='w+', suffix='.py', delete=False, dir=path) as f: f.write(module_repr) spec = spec_from_file_location(cls_name, f.name) mod = module_from_spec(spec) sys.modules[cls_name] = mod spec.loader.exec_module(mod) return getattr(mod, cls_name)
def test_invalid_param(self): with TempFile() as f: args = ['--in-format', 'none', f.name] self.assertNotEqual(_JubaModelCommand.start(args), 0) args = ['--out-format', 'none', f.name] self.assertNotEqual(_JubaModelCommand.start(args), 0) args = ['--no-such-option'] self.assertNotEqual(_JubaModelCommand.start(args), 0)
def test_unicode_separator(self): with TempFile() as f: f.write("v1★v2\ns1★s2\n".encode('utf-8')) f.flush() loader = CSVLoader(f.name, delimiter='★') lines = 0 for row in loader: lines += 1 self.assertEqual('s1', row['v1']) self.assertEqual('s2', row['v2']) self.assertEqual(1, lines)
def test_cp932_manual_fieldnames(self): with TempFile() as f: f.write("テスト1,テスト2\nテスト1,テスト2".encode('cp932')) f.flush() # assign field names statically loader = CSVLoader(f.name, ['列1', '列2'], 'cp932', delimiter=',') lines = 0 for row in loader: lines += 1 self.assertEqual('テスト1', row['列1']) self.assertEqual('テスト2', row['列2']) self.assertEqual(2, lines)
def test_cp932_seq_fieldnames(self): with TempFile() as f: f.write("テスト1,テスト2\nテスト1,テスト2".encode('cp932')) f.flush() # assign sequential field names loader = CSVLoader(f.name, False, 'cp932', delimiter=',') lines = 0 for row in loader: lines += 1 self.assertEqual('テスト1', row['c0']) self.assertEqual('テスト2', row['c1']) self.assertEqual(2, lines)
def test_cp932(self): with TempFile() as f: f.write("列1,列2\nテスト1,テスト2\n".encode('cp932')) f.flush() # predict field names from 1st row loader = CSVLoader(f.name, None, 'cp932', delimiter=',') lines = 0 for row in loader: lines += 1 self.assertEqual('テスト1', row['列1']) self.assertEqual('テスト2', row['列2']) self.assertEqual(1, lines)
def test_simple(self): with TempFile() as f: f.write("k1,\"k2\",k3\n1,2,3\n4,5,6".encode('utf-8')) f.flush() loader = CSVLoader(f.name) for row in loader: self.assertEqual(set(['k1', 'k2', 'k3']), set(row.keys())) if row['k1'] == '1': self.assertEqual('2', row['k2']) self.assertEqual('3', row['k3']) elif row['k1'] == '4': self.assertEqual('5', row['k2']) self.assertEqual('6', row['k3']) else: self.fail('unexpected row')
def test_noheader(self): with TempFile() as f: f.write("1,\"2\",3\n\"4\",5,\"6\"".encode('utf-8')) f.flush() loader = CSVLoader(f.name, False) for row in loader: self.assertEqual(set(['c0', 'c1', 'c2']), set(row.keys())) if row['c0'] == '1': self.assertEqual('2', row['c1']) self.assertEqual('3', row['c2']) elif row['c0'] == '4': self.assertEqual('5', row['c1']) self.assertEqual('6', row['c2']) else: self.fail('unexpected row')
def test_simple(self): data = 'hello\nworld' lines = [] with TempFile() as f: f.write(data.encode()) f.flush() loader = LineBasedFileLoader(f.name) for line in loader: lines.append(line) self.assertEqual([{ 'line': 'hello\n', 'number': 0 }, { 'line': 'world', 'number': 1 }], lines)
def test_valid_param(self): with TempFile() as f: f.write(_get_binary_file().read()) f.flush() args = ['--in-format', 'binary', '--out-format', 'json', f.name] self.assertEqual(_JubaModelCommand.start(args), 0)
def tempfile(self, **kwargs): """ Create a NamedTemporaryFile in the TempPath. """ kwargs.pop("dir", None) return TempFile(dir=str(self), **kwargs)
def __init__(self,fs,path,mode): self.file = TempFile() self.fs = fs self.path = path self.mode = mode
def test_guess_header(self): with TempFile() as f: f.write("k1|k2|k3\n1|2|3".encode()) f.flush() loader = CSVLoader(f.name, fieldnames=True, delimiter='|') self.assertEqual([{'k1': '1', 'k2': '2', 'k3': '3'}], list(loader))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data', '-d', required=True, help='Path to file to translate') parser.add_argument('--model', '-m', required=True, help='Model checkpoint') parser.add_argument('--lenpen', default=1.2, type=float, help='Length penalty') parser.add_argument('--beam', default=5, type=int, help='Beam size') parser.add_argument('--max-len-a', type=float, default=0, help='max-len-a parameter when back-translating') parser.add_argument('--max-len-b', type=int, default=200, help='max-len-b parameter when back-translating') parser.add_argument('--cpu', type=int, default=4, help='Number of CPU for interactive.py') parser.add_argument( '--cuda-visible-device-ids', '-gids', default=None, nargs='*', help='List of cuda visible devices ids, camma separated') parser.add_argument( '--dest', help='Output path for the intermediate and translated file') parser.add_argument('--max-tokens', type=int, default=12000, help='max tokens') parser.add_argument('--buffer-size', type=int, default=10000, help='Buffer size') parser.add_argument('--chunks', type=int, default=100) parser.add_argument( '--source-lang', type=str, default=None, help='Source langauge. Will inference from the model if not set') parser.add_argument( '--target-lang', type=str, default=None, help='Target langauge. Will inference from the model if not set') parser.add_argument( '--databin', type=str, default=None, help='Parallel databin. Will combine with the back-translated databin') parser.add_argument('--sbatch-args', default='', help='Extra SBATCH arguments') parser.add_argument('--backend', type=str, default='local', choices=['local', 'slurm']) args = parser.parse_args() args.cuda_visible_device_ids = args.cuda_visible_device_ids or list( range(torch.cuda.device_count())) chkpnt = torch.load(args.model) model_args = chkpnt['args'] if args.source_lang is None or args.target_lang is None: args.source_lang = args.source_lang or model_args.source_lang args.target_lang = args.target_lang or model_args.target_lang if args.databin is None: args.databin = args.databin or model_args.data root_dir = os.path.dirname(os.path.realpath(__file__)) translation_dir = os.path.join(args.dest or root_dir, 'translations', f'{args.source_lang}-{args.target_lang}') tempdir = os.path.join(translation_dir, 'splits') os.makedirs(tempdir, exist_ok=True) split_files = glob(f'{tempdir}/mono_data*') if len(split_files) != args.chunks: if len(split_files) != 0: print( "number of split files are not the same as chunks. removing files and re-split" ) [os.remove(os.path.join(tempdir, f)) for f in os.listdir(tempdir)] print("splitting files ...") check_call( f'split -n "r/{args.chunks}" -a3 -d {args.data} {tempdir}/mono_data', shell=True) split_files = glob(f'{tempdir}/mono_data*') else: print( "has the same number of splitted file and the specified chunks, skip splitting file" ) translated_files = [] files_to_translate = [] for file in split_files: # skip the translation job if it's finished output_file = get_output_file(translation_dir, file) translated_files.append(output_file) if check_finished(output_file): print(f"{output_file} is translated") continue files_to_translate.append(file) print(f"{len(files_to_translate)} files to translate") translate_files(args, translation_dir, files_to_translate) # aggregate translated files generated_src = f'{args.dest}/generated.src' generated_tgt = f'{args.dest}/generated.hypo' if count_line(generated_src) != count_line(generated_tgt) or count_line( generated_src) <= 0: print(f"aggregating translated {len(translated_files)} files") with TempFile() as fout: files = " ".join(translated_files) check_call(f"cat {files}", shell=True, stdout=fout) # strip head and make pairs check_call( f'cat {fout.name} | grep "^S" | cut -f2 > {generated_src}', shell=True) check_call( f'cat {fout.name} | grep "^H" | cut -f3 > {generated_tgt}', shell=True) assert count_line(generated_src) == count_line(generated_tgt) print(f"output generated files to {generated_src}, {generated_tgt}")
def _screen_shot(self): from tempfile import NamedTemporaryFile as TempFile tmp = TempFile(suffix='.png') self.save_screenshot(tmp.name) return tmp.read()
def tempfile(self, filename=None, **kwargs): """ Instantiate a NamedTemporaryFile or use an existing file in the TempPath and return it as a Path object. """ return Path(TempFile(dir=str(self), **kwargs).name if filename is None else self.joinpath(filename))
class SearchSchemaParser(object): """Search schema parser. Parses POI-file and generates: a string containing the SQL statements required to generate and populate postgres tables, a string for querying. """ EVENTS = ("start", "end") SEARCH_TABLE_SCHEMA_TAG = "SearchTableSchema" SEARCH_TABLE_VALUES_TAG = "SearchTableValues" BALLOON_STYLE_TAG = "BalloonStyle" FIELD_TAG = "field" R_TAG = "r" LAT_TAG = "lat" LON_TAG = "lon" SEARCH_FILE_NAME = "SearchDataFile" # Types that need to be UTF-encoded when writing to postgres database. ENCODE_TYPES = ["varchar", "character varying", "character", "text"] def __init__(self, db_updater): """Inits search schema parser. Args: db_updater: database updater. """ self._db_updater = db_updater self._current_tag = None self._within_record = False self._within_style = False def Parse(self, search_file, table_name, file_prefix=None): """Parser entry point. Parses the given POI file to POI elements, based on POI elements builds SQL statements for creating and populating POI table in POI database and triggers the DB updater to implement these SQL statements. Args: search_file: string containing absolute path to .poi file to be parsed. table_name: string containing name to use for POI table creation. Returns: num_fields: number of fields in search schema. sql_search: string containing SQL statement to execute for POI query. balloon_style: string containing associated balloon style. Raises: exceptions.SearchSchemaParserException exception. psycopg2.Warning/Error exceptions. """ self._table_name = table_name self._file_prefix = file_prefix logger.info("Ingesting POI file %s into parser...", search_file) if file_prefix is None: logger.info("File prefix is None") else: logger.info("File prefix is '%s'", file_prefix) self.__StartDocument() try: context = ET.iterparse(search_file, SearchSchemaParser.EVENTS) except ET.ParseError, e: row, column = e.position raise exceptions.SearchSchemaParserException( "Unable to parse POI file %s." " A parsing error on row %d column %d: %s" % ( search_file, row, column, e)) logger.info("Ingesting POI file %s into parser done.", search_file) logger.info("Parsing POI elements and inserting into POI database...") # File as temp buffer to store records, for COPY db command self.tmp_file = TempFile(max_size=_K_SPOOL_SIZE, suffix=table_name) num_elements = 0 self._element_start = self.__StartElementHeader self._element_end = self.__EndElementHeader for event, elem in context: if event == "start": self.__StartElement(elem) elif event == "end": self.__EndElement(elem) num_elements += 1 elem.clear() self.__EndDocument() logger.info("Total POI elements: %s.", num_elements) logger.info("Parsing POI elements and inserting into POI database done.") return (self._num_fields, self._sql_search, self._balloon_style)
def write_csv(content): file_ = TempFile(mode="w+", suffix=".csv") file_.write(content) file_.seek(0) return file_