async def test_lovelace_update_view(hass, hass_ws_client): """Test update_view command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') origyaml = yaml.load(TEST_YAML_A) with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=origyaml), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/update', 'view_id': 'example', 'view_config': 'id: example2\ntitle: New title\n', }) msg = await client.receive_json() result = save_yaml_mock.call_args_list[0][0][1] orig_view = origyaml.mlget(['views', 0], list_ok=True) new_view = result.mlget(['views', 0], list_ok=True) assert new_view['title'] == 'New title' assert new_view['cards'] == orig_view['cards'] assert 'theme' not in new_view assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success']
async def test_lovelace_move_card_view_position(hass, hass_ws_client): """Test move_card to view with position command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: await client.send_json({ 'id': 5, 'type': 'lovelace/config/card/move', 'card_id': 'test', 'new_view_id': 'example', 'new_position': 1, }) msg = await client.receive_json() result = save_yaml_mock.call_args_list[0][0][1] assert result.mlget(['views', 0, 'cards', 1, 'title'], list_ok=True) == 'Test card' assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success']
def parse(self, input): """parse the given file or file source string""" if hasattr(input, 'name'): self.filename = input.name elif not getattr(self, 'filename', ''): self.filename = '' if hasattr(input, "read"): src = input.read() input.close() input = src if isinstance(input, bytes): input = input.decode('utf-8') yaml = YAML() try: self._file = yaml.load(input) except YAMLError as e: message = e.problem if hasattr(e, 'problem') else e.message if hasattr(e, 'problem_mark'): message += ' {0}'.format(e.problem_mark) raise base.ParseError(message) self._file = self.preprocess(self._file) for k, data in self._flatten(self._file): unit = self.UnitClass(data) unit.setid(k) self.addunit(unit)
async def test_lovelace_get_view(hass, hass_ws_client): """Test get_view command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)): await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/get', 'view_id': 'example', }) msg = await client.receive_json() assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success'] assert "".join(msg['result'].split()) == "".join('title: Example\n # \ Optional unique id for direct\ access /lovelace/${id}\nid: example\n # Optional\ background (overwrites the global background).\n\ background: radial-gradient(crimson, skyblue)\n\ # Each view can have a different theme applied.\n\ theme: dark-mode\n'.split())
def testToYAMLs(self, tests, rootFolder='./testsNewBuild/'): """Writes a batch of tests to file in the yaml format, grouping them by team and name :param tests: list of tests to write to file :type tests: list :param rootFolder: destination folder, defaults to './testsNewBuild/' :param rootFolder: str, optional """ # extract unique test names uniqueTestNames = set([c.name for c in tests]) # group by test names to put them in same files for name in uniqueTestNames: yaml = YAML() yaml.default_flow_style = False testDict = None for t in tests: if t.name == name: f = open(os.path.join( rootFolder, t.team, name + '.yaml'), "w+") if testDict == None: testDict = t.toDict() else: key = 'metric' + str(len(testDict['metrics'])+1) testDict['metrics'][key] = t.toDict()[ 'metrics']['metric1'] yaml.dump(testDict, f)
def handle(self, *args, **options): def flatten(l): return [item for sublist in l for item in sublist] yaml = YAML() with open(options['yaml']) as yamlfile: data = yaml.load(yamlfile) for attribute in flatten(data['attributes'].values()): SuomiFiUserAttribute.objects.update_or_create( friendly_name=attribute['friendly_name'], uri=attribute['uri'], name=attribute['name'], description=attribute['description'] ) for level, details in data['access_levels'].items(): access_level, created = SuomiFiAccessLevel.objects.update_or_create(shorthand=level) for language, name in details['name'].items(): access_level.set_current_language(language) access_level.name = name for language, description in details['description'].items(): access_level.set_current_language(language) access_level.description = description for attribute in flatten(details['fields']): access_level.attributes.add(SuomiFiUserAttribute.objects.get(friendly_name=attribute['friendly_name'])) access_level.save()
def mocked_config_file_path( fake_temp_data_pocketsphinx_dic, fake_temp_data_pocketsphinx_lm, tmpdir_factory ): path_to_pocketsphix_dic = os.path.join( str(fake_temp_data_pocketsphinx_dic), "fake.dic" ) path_to_pocketsphix_lm = os.path.join( str(fake_temp_data_pocketsphinx_lm), "fake.lm" ) # config part base = tempfile.mkdtemp() config_file = os.path.join(base, "config.yaml") yaml = YAML() m_cfg = yaml.load(COMMON_MOCKED_CONFIG) m_cfg["pocketsphinx"]["dic"] = path_to_pocketsphix_dic m_cfg["pocketsphinx"]["lm"] = path_to_pocketsphix_lm with open(config_file, "w", encoding="utf-8") as fp: yaml.dump(m_cfg, fp) yield config_file shutil.rmtree(base)
def obj_from_file(filename='annotation.yaml', filetype='auto'): ''' Read object from file ''' if filetype == 'auto': _, ext = os.path.splitext(filename) filetype = ext[1:] if filetype in ('yaml', 'yml'): from ruamel.yaml import YAML yaml = YAML(typ="unsafe") with open(filename, encoding="utf-8") as f: obj = yaml.load(f) if obj is None: obj = {} # import yaml # with open(filename, encoding="utf-8") as f: # intext = f.read() # obj = yaml.load(intext) elif filetype in ('pickle', 'pkl', 'pklz', 'picklezip'): fcontent = read_pkl_and_pklz(filename) # import pickle if sys.version_info[0] < 3: import cPickle as pickle else: import _pickle as pickle # import sPickle as pickle if sys.version_info.major == 2: obj = pickle.loads(fcontent) else: obj = pickle.loads(fcontent, encoding="latin1") else: logger.error('Unknown filetype ' + filetype) return obj
def dump(self, data, stream=None, **kw): inefficient = False if stream is None: inefficient = True stream = StringIO() YAML.dump(self, data, stream, **kw) if inefficient: return stream.getvalue()
def test_to_file(self): filename = "ff_test.yaml" b = self.benzene b.to_file(filename=filename) yaml = YAML(typ="safe") with open(filename, "r") as f: d = yaml.load(f) self.assertListEqual(d["mass_info"], [list(m) for m in b.mass_info]) self.assertListEqual(d["pair_coeffs"], b.pair_coeffs)
def to_yaml(self, filename): if self.stats is None: logger.error("Run .skeleton_analysis() before .to_yaml()") return from ruamel.yaml import YAML yaml = YAML(typ="unsafe") with open(filename, 'wt', encoding="utf-8") as f: yaml.dump(self.stats, f)
def test_to_file(self): filename = "ff_test.yaml" v = self.virus v.to_file(filename=filename) yaml = YAML(typ="safe") with open(filename, "r") as f: d = yaml.load(f) self.assertListEqual(d["mass_info"], [list(m) for m in v.mass_info]) self.assertListEqual(d["nonbond_coeffs"], v.nonbond_coeffs)
def test_id_not_changed(): """Test if id is not changed if already exists.""" yaml = YAML(typ='rt') fname = "dummy.yaml" with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_B)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: migrate_config(fname) assert save_yaml_mock.call_count == 0
def test_save_yaml_model(tmpdir, mini_model): jsonschema = pytest.importorskip("jsonschema") """Test the writing of YAML model.""" output_file = tmpdir.join("mini.yml") cio.save_yaml_model(mini_model, output_file.strpath, sort=True) # validate against schema yaml = YAML(typ="unsafe") with open(output_file.strpath, "r") as infile: yaml_to_dict = yaml.load(infile) dict_to_json = json.dumps(yaml_to_dict) loaded = json.loads(dict_to_json) assert jsonschema.validate(loaded, cio.json.json_schema)
def from_file(cls, filename): """ Constructor that reads in a file in YAML format. Args: filename (str): Filename. """ yaml = YAML(typ="safe") with open(filename, "r") as f: d = yaml.load(f) return cls.from_dict(d)
def to_file(self, filename): """ Saves object to a file in YAML format. Args: filename (str): File name. """ d = {"mass_info": self.mass_info, "pair_coeffs": self.pair_coeffs, "mol_coeffs": self.mol_coeffs} yaml = YAML(typ="safe") with open(filename, "w") as f: yaml.dump(d, f)
def test_add_id(): """Test if id is added.""" yaml = YAML(typ='rt') fname = "dummy.yaml" with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: migrate_config(fname) result = save_yaml_mock.call_args_list[0][0][1] assert 'id' in result['views'][0]['cards'][0] assert 'id' in result['views'][1]
def vt2esofspy(vesseltree, outputfilename="tracer.txt", axisorder=[0, 1, 2]): """ exports vesseltree to esofspy format :param vesseltree: filename or vesseltree dictionary structure :param outputfilename: output file name :param axisorder: order of axis can be specified with this option :return: """ if (type(vesseltree) == str) and os.path.isfile(vesseltree): from ruamel.yaml import YAML yaml = YAML(typ="unsafe") with open(vesseltree, encoding="utf-8") as f: vt = yaml.load(f) else: vt = vesseltree logger.debug(str(vt['general'])) logger.debug(str(vt.keys())) vtgm = vt['graph']['microstructure'] lines = [] vs = vt['general']['voxel_size_mm'] sh = vt['general']['shape_px'] # switch axis ax = axisorder lines.append("#Tracer+\n") lines.append("#voxelsize mm %f %f %f\n" % (vs[ax[0]], vs[ax[1]], vs[ax[2]])) lines.append("#shape %i %i %i\n" % (sh[ax[0]], sh[ax[1]], sh[ax[2]])) lines.append(str(len(vtgm) * 2)+"\n") i = 1 for id in vtgm: try: nda = vtgm[id]['nodeA_ZYX'] ndb = vtgm[id]['nodeB_ZYX'] lines.append("%i\t%i\t%i\t%i\n" % (nda[ax[0]], nda[ax[1]], nda[ax[2]], i)) lines.append("%i\t%i\t%i\t%i\n" % (ndb[ax[0]], ndb[ax[1]], ndb[ax[2]], i)) i += 1 except: pass lines.append("%i\t%i\t%i\t%i" % (0, 0, 0, 0)) lines[3] = str(i - 1) + "\n" from builtins import str as text with open(outputfilename, 'wt') as f: for line in lines: f.write(text(line))
def edit_tmpvault(filename): '''Update yaml config and by changing any key with the value CHANGE_AND_REKEY requests a master password and uses pbkdf2 to get a master key to base all of the new keys off of ''' yaml = YAML() with open(filename) as fobj: vault_dict = yaml.load(fobj) master_pass = getpass.getpass("Enter master key to generate values: ").encode('utf-8') master_key = hashlib.pbkdf2_hmac('sha256', master_pass, os.urandom(16), 100000) change_values(vault_dict, 'CHANGE_AND_REKEY', master_key) with open(filename, 'w') as fobj: yaml.dump(vault_dict, fobj)
def main(): yaml = YAML() file_name = sys.argv[1] file_in = open(file_name).read() docs = yaml.load_all(file_in) i = 0 for doc in docs: if i == 0: code_old = doc else: code_new = doc i = i + 1 delta_map = dict() follow_keys = list() add = list() update = list() traversal(code_old, code_new, follow_keys, delta_map, update, add) yaml.dump(code_old, sys.stdout) split = '------love----you------choerodon----' print(split) yaml.dump(delta_map, sys.stdout) print(split) change_key_map = dict() change_key_map["add"] = add change_key_map["update"] = update yaml.dump(change_key_map, sys.stdout)
def loadtestDictsFromFilePaths(self, testFilePaths): """Parses yaml files from given filepaths :param testFilePaths: file names to parse :type testFilePaths: list of strings :return: list of dict parsed from the yaml :rtype: list of dicts """ testDicts = [] yaml = YAML() for testFile in testFilePaths: with open(testFile) as f: testDict = yaml.load(f) testDicts.append(dict(testDict)) return testDicts
def dumpyaml( yamlp: YAML, data: Any, stream: Any = None, **kw: Any ) -> Optional[str]: """Dump YAML to string.""" inefficient = False if stream is None: inefficient = True stream = StringIO() # overriding here to get dumping to # not sort keys. yamlp = YAML() yamlp.indent(mapping=4, sequence=6, offset=3) # yamlp.compact(seq_seq=False, seq_map=False) yamlp.dump(data, stream, **kw) if inefficient: return cast(str, stream.getvalue()) return None
def get_default_opttask_kwargs(): """ Get the default configuration kwargs for OptTask. Args: None Returns: conf_dict (dict): The default kwargs for OptTask """ cwd = os.path.dirname(os.path.realpath(__file__)) fname = os.path.join(cwd, "defaults.yaml") with open(fname, 'r') as config_raw: yaml = YAML() conf_dict = dict(yaml.load(config_raw)) return conf_dict
def list_products_yaml(self, hostname, system): from ruamel.yaml import YAML yml = YAML(typ='safe', pure=False) yml.default_flow_style = False yml.explicit_end = True yml.explicit_start = True yml.indent(mapping=4, sequence=4, offset=2) data = system.to_refhost_dict() data["name"] = str(hostname) yml.dump(data, self.output)
def main(): """Main application entry point.""" if len(sys.argv) != 3: print("Usage: yc-calc <input-file> <output-file>") sys.exit(1) infile = sys.argv[1] outfile = sys.argv[2] mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG sequence_tag = yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG yaml.add_constructor(mapping_tag, dict_constructor, Loader=RoundTripConstructor) yaml.add_constructor(sequence_tag, list_constructor, Loader=RoundTripConstructor) yaml.add_representer(CalcDict, dict_representer, Dumper=RoundTripRepresenter) yaml.add_representer(CalcList, list_representer, Dumper=RoundTripRepresenter) try: with open(infile) as infp: top = YAML().load(infp) if not isinstance(top, CalcDict): type_name = type(top).__name__ err("Top level element should be dict not {0}".format(type_name)) defs = {} defs_str = top.get("DEFS", "") try: exec(defs_str, defs) except Exception as exc: err("Error executing DEFS: {0}".format(exc)) CalcContainer.set_top(defs, top) write(top, outfile) except IOError as exc: err("Error opening file: {0}".format(exc)) except yaml.YAMLError as exc: err("Error parsing input: {0}".format(exc))
async def test_lovelace_get_card(hass, hass_ws_client): """Test get_card command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)): await client.send_json({ 'id': 5, 'type': 'lovelace/config/card/get', 'card_id': 'test', }) msg = await client.receive_json() assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success'] assert msg['result'] == 'id: test\ntype: entities\ntitle: Test card\n'
async def test_lovelace_get_view_not_found(hass, hass_ws_client): """Test get_card command cannot find card.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)): await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/get', 'view_id': 'not_found', }) msg = await client.receive_json() assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success'] is False assert msg['error']['code'] == 'view_not_found'
class TestYAML(unittest.TestCase): """Test lovelace.yaml save and load.""" def setUp(self): """Set up for tests.""" self.tmp_dir = mkdtemp() self.yaml = YAML(typ='rt') def tearDown(self): """Clean up after tests.""" for fname in os.listdir(self.tmp_dir): os.remove(os.path.join(self.tmp_dir, fname)) os.rmdir(self.tmp_dir) def _path_for(self, leaf_name): return os.path.join(self.tmp_dir, leaf_name+".yaml") def test_save_and_load(self): """Test saving and loading back.""" fname = self._path_for("test1") open(fname, "w+") util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A)) data = util_yaml.load_yaml(fname, True) assert data == self.yaml.load(TEST_YAML_A) def test_overwrite_and_reload(self): """Test that we can overwrite an existing file and read back.""" fname = self._path_for("test2") open(fname, "w+") util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A)) util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_B)) data = util_yaml.load_yaml(fname, True) assert data == self.yaml.load(TEST_YAML_B) def test_load_bad_data(self): """Test error from trying to load unserialisable data.""" fname = self._path_for("test3") with open(fname, "w") as fh: fh.write(TEST_BAD_YAML) with pytest.raises(HomeAssistantError): util_yaml.load_yaml(fname, True)
def _readConfig(self, fileName): try: with open(fileName, "r") as config: yaml = YAML() configData = yaml.load(config) if not configData: configData = {} except Exception as e: raise ConfigError(fileName, e) if "include" in configData: for fileName in configData["include"]: includeConfig = self._readConfig(fileName) for key, val in includeConfig.iteritems(): if key not in configData: configData[key] = val elif not isinstance(configData[key], basestring): # Let's try to merge them if they're collections if isinstance(val, basestring): raise ConfigError(fileName, "The included configuration file tried to merge a non-string " "with a string.") try: # Make sure both things we're merging are still iterable types (not numbers or whatever) iter(configData[key]) iter(val) except TypeError: pass # Just don't merge them if they're not else: try: configData[key] += val # Merge with the + operator except TypeError: # Except that some collections (dicts) can't try: for subkey, subval in val.iteritems(): # So merge them manually if subkey not in configData[key]: configData[key][subkey] = subval except (AttributeError, TypeError): # If either of these, they weren't both dicts (but were still iterable); # requires user to resolve raise ConfigError(fileName, "The variable {} could not be successfully merged " "across files.".format(key)) del configData["include"] return configData
def _to_text(self, filename=None, is_json=False): """Serialize to a json/yaml file""" extra_data = {} if self.extra_data is None else self.extra_data def cell_value(a_cell): if a_cell.formula and a_cell.formula.python_code: return '=' + a_cell.formula.python_code else: return a_cell.value extra_data.update(dict( excel_hash=self._excel_file_md5_digest, cell_map=dict(sorted( ((addr, cell_value(cell)) for addr, cell in self.cell_map.items() if cell.serialize), key=lambda x: AddressRange(x[0]).sort_key )), )) if not filename: filename = self.filename + ('.json' if is_json else '.yml') # hash the current file to see if this function makes any changes existing_hash = (self._compute_file_md5_digest(filename) if os.path.exists(filename) else None) if not is_json: with open(filename, 'w') as f: ymlo = YAML() ymlo.width = 120 ymlo.dump(extra_data, f) else: with open(filename, 'w') as f: json.dump(extra_data, f, indent=4) del extra_data['cell_map'] # hash the newfile, return True if it changed, this is only reliable # on pythons which have ordered dict (CPython 3.6 & python 3.7+) return (existing_hash is None or existing_hash != self._compute_file_md5_digest(filename))
def main(): args = parse_args() name = construct_name( args.exp_name, args.lr, args.batch_size, args.max_steps, args.num_epochs, args.weight_decay, args.optimizer, args.iter_per_step, ) # time stamp date_time = datetime.now().strftime("%m-%d-%Y -- %H-%M-%S") log_dir = name if args.work_dir: log_dir = os.path.join(args.work_dir, name) if args.tensorboard_dir is None: tensorboard_dir = os.path.join(name, 'tensorboard', date_time) else: tensorboard_dir = args.tensorboard_dir if args.checkpoint_dir is None: checkpoint_dir = os.path.join(name, date_time) else: base_checkpoint_dir = args.checkpoint_dir if len(glob.glob(os.path.join(base_checkpoint_dir, '*.pt'))) > 0: checkpoint_dir = base_checkpoint_dir else: checkpoint_dir = os.path.join(args.checkpoint_dir, date_time) # instantiate Neural Factory with supported backend neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=log_dir, checkpoint_dir=checkpoint_dir, create_tb_writer=args.create_tb_writer, files_to_copy=[args.model_config, __file__], cudnn_benchmark=args.cudnn_benchmark, tensorboard_dir=tensorboard_dir, ) args.num_gpus = neural_factory.world_size if args.local_rank is not None: logging.info('Doing ALL GPU') # build dags train_loss, callbacks, steps_per_epoch = create_all_dags( args, neural_factory) yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) lr_schedule = jasper_params.get('lr_schedule', 'CosineAnnealing') if lr_schedule == 'CosineAnnealing': lr_policy = CosineAnnealing( total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch, warmup_ratio=args.warmup_ratio, min_lr=args.min_lr, ) elif lr_schedule == 'PolynomialDecayAnnealing': lr_policy = PolynomialDecayAnnealing( total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch, warmup_ratio=args.warmup_ratio, min_lr=args.min_lr, power=2.0, ) elif lr_schedule == 'PolynomialHoldDecayAnnealing': lr_policy = PolynomialHoldDecayAnnealing( total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch, warmup_ratio=args.warmup_ratio, hold_ratio=args.hold_ratio, min_lr=args.min_lr, power=2.0, ) else: raise ValueError("LR schedule is invalid !") logging.info(f"Using `{lr_policy}` Learning Rate Scheduler") # train model neural_factory.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=lr_policy, optimizer=args.optimizer, optimization_params={ "num_epochs": args.num_epochs, "max_steps": args.max_steps, "lr": args.lr, "momentum": 0.95, "betas": (args.beta1, args.beta2), "weight_decay": args.weight_decay, "grad_norm_clip": None, }, batches_per_step=args.iter_per_step, )
''' Model-Based Actor-Critic Script: MBPO Do not modify. ''' # pylint: disable=E0401 import sys from ruamel.yaml import YAML from src.mbpo import MBPO if __name__ == "__main__": # load the yaml config file yaml = YAML() v = yaml.load(open(sys.argv[1])) # initialize the main class agent = MBPO(train_kwargs=v["train_kwargs"], model_kwargs=v["model_kwargs"], TD3_kwargs=v["TD3_kwargs"]) # run the training routine agent.train()
win_wshshl.SendKeys('{F13}') def say_nihao(systray): syncSpeak('你好') def make_beep(systray): winsound.Beep(1000, 1000) # ━━━ 读写配置 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ from ruamel.yaml import YAML yaml = YAML() from bin.common import count_file def get_count(): # 直接从配置文件中读取count值 with open(count_file, encoding='utf-8') as f: return yaml.load(f)['count'] def set_count(count): # 直接像配置文件中读取count值 with open(count_file, 'w', encoding='utf-8') as f: yaml.dump({'count': count}, f) # ■■■ 核心功能 ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
class TestASRPytorch(NeMoUnitTest): labels = [ " ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'", ] manifest_filepath = os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json")) featurizer_config = { 'window': 'hann', 'dither': 1e-05, 'normalize': 'per_feature', 'frame_splicing': 1, 'int_values': False, 'window_stride': 0.01, 'sample_rate': freq, 'features': 64, 'n_fft': 512, 'window_size': 0.02, } yaml = YAML(typ="safe") @classmethod def setUpClass(cls) -> None: super().setUpClass() data_folder = os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/")) logging.info("Looking up for test ASR data") if not os.path.exists(os.path.join(data_folder, "asr")): logging.info("Extracting ASR data to: {0}".format( os.path.join(data_folder, "asr"))) tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz") tar.extractall(path=data_folder) tar.close() else: logging.info("ASR data found in: {0}".format( os.path.join(data_folder, "asr"))) @classmethod def tearDownClass(cls) -> None: super().tearDownClass() data_folder = os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/")) logging.info("Looking up for test ASR data") if os.path.exists(os.path.join(data_folder, "asr")): shutil.rmtree(os.path.join(data_folder, "asr")) def test_transcript_normalizers(self): # Create test json test_strings = [ "TEST CAPITALIZATION", '!\\"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~', "3+3=10", "3 + 3 = 10", "why is \\t whitepsace\\tsuch a problem why indeed", "\\\"Can you handle quotes?,\\\" says the boy", "I Jump!!!!With joy?Now.", "Maybe I want to learn periods.", "$10 10.90 1-800-000-0000", "18000000000 one thousand 2020", "1 10 100 1000 10000 100000 1000000", "Î ĻƠvɆȩȅĘ ÀÁÃ Ą ÇĊňńŤŧș", "‘’“”❛❜❝❞「 」 〈 〉 《 》 【 】 〔 〕 ⦗ ⦘ 😙 👀 🔨", "It only costs $1 000 000! Cheap right?", "2500, 3000 are separate but 200, 125 is not", "1", "1 2", "1 2 3", "10:00pm is 10:00 pm is 22:00 but not 10: 00 pm", "10:00 10:01pm 10:10am 10:90pm", "Mr. Expand me!", "Mr Don't Expand me!", ] normalized_strings = [ "test capitalization", 'percent and \' plus', "three plus three ten", "three plus three ten", "why is whitepsace such a problem why indeed", "can you handle quotes says the boy", "i jump with joy now", "maybe i want to learn periods", "ten dollars ten point nine zero one eight hundred zero zero", "eighteen billion one thousand two thousand and twenty", # Two line string below "one ten thousand one hundred one thousand ten thousand one " "hundred thousand one million", "i loveeee aaa a ccnntts", "''", "it only costs one million dollars cheap right", # Two line string below "two thousand five hundred three thousand are separate but two " "hundred thousand one hundred and twenty five is not", "one", "one two", "one two three", "ten pm is ten pm is twenty two but not ten zero pm", "ten ten one pm ten ten am ten ninety pm", "mister expand me", "mr don't expand me", ] manifest_paths = os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/asr/manifest_test.json")) def remove_test_json(): os.remove(manifest_paths) self.addCleanup(remove_test_json) with open(manifest_paths, "w") as f: for s in test_strings: f.write('{"audio_filepath": "", "duration": 1.0, "text": ' f'"{s}"}}\n') parser = parsers.make_parser(self.labels, 'en') manifest = collections.ASRAudioText( manifests_files=[manifest_paths], parser=parser, ) for i, s in enumerate(normalized_strings): self.assertTrue(manifest[i].text_tokens == parser(s)) def test_pytorch_audio_dataset(self): featurizer = WaveformFeaturizer.from_config(self.featurizer_config) ds = AudioDataset( manifest_filepath=self.manifest_filepath, labels=self.labels, featurizer=featurizer, ) for i in range(len(ds)): if i == 5: logging.info(ds[i]) # logging.info(ds[i][0].shape) # self.assertEqual(freq, ds[i][0].shape[0]) def test_dataloader(self): batch_size = 4 dl = nemo_asr.AudioToTextDataLayer( # featurizer_config=self.featurizer_config, manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=batch_size, # placement=DeviceType.GPU, drop_last=True, ) for ind, data in enumerate(dl.data_iterator): # With num_workers update, this is no longer true # Moving to GPU is handled by AudioPreprocessor # data is on GPU # self.assertTrue(data[0].is_cuda) # self.assertTrue(data[1].is_cuda) # self.assertTrue(data[2].is_cuda) # self.assertTrue(data[3].is_cuda) # first dimension is batch self.assertTrue(data[0].size(0) == batch_size) self.assertTrue(data[1].size(0) == batch_size) self.assertTrue(data[2].size(0) == batch_size) self.assertTrue(data[3].size(0) == batch_size) def test_preprocessor_errors(self): def create_broken_preprocessor_1(): nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=2, n_window_size=2) def create_broken_preprocessor_2(): nemo_asr.AudioToMelSpectrogramPreprocessor(window_stride=2, n_window_stride=2) def create_broken_preprocessor_3(): nemo_asr.AudioToMelSpectrogramPreprocessor(n_window_stride=2) def create_good_preprocessor_1(): nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=0.02, window_stride=0.01) def create_good_preprocessor_2(): nemo_asr.AudioToMelSpectrogramPreprocessor( window_size=None, window_stride=None, n_window_size=256, n_window_stride=32, ) self.assertRaises(ValueError, create_broken_preprocessor_1) self.assertRaises(ValueError, create_broken_preprocessor_2) self.assertRaises(ValueError, create_broken_preprocessor_3) create_good_preprocessor_1() create_good_preprocessor_2() def test_kaldi_dataloader(self): batch_size = 4 dl = nemo_asr.KaldiFeatureDataLayer( kaldi_dir=os.path.abspath( os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')), labels=self.labels, batch_size=batch_size, ) for data in dl.data_iterator: self.assertTrue(data[0].size(0) == batch_size) dl_test_min = nemo_asr.KaldiFeatureDataLayer( kaldi_dir=os.path.abspath( os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')), labels=self.labels, batch_size=batch_size, min_duration=1.0, ) self.assertTrue(len(dl_test_min) == 18) dl_test_max = nemo_asr.KaldiFeatureDataLayer( kaldi_dir=os.path.abspath( os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')), labels=self.labels, batch_size=batch_size, max_duration=5.0, ) self.assertTrue(len(dl_test_max) == 19) def test_trim_silence(self): batch_size = 4 normal_dl = nemo_asr.AudioToTextDataLayer( # featurizer_config=self.featurizer_config, manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=batch_size, # placement=DeviceType.GPU, drop_last=True, shuffle=False, ) trimmed_dl = nemo_asr.AudioToTextDataLayer( # featurizer_config=self.featurizer_config, manifest_filepath=self.manifest_filepath, trim_silence=True, labels=self.labels, batch_size=batch_size, # placement=DeviceType.GPU, drop_last=True, shuffle=False, ) for norm, trim in zip(normal_dl.data_iterator, trimmed_dl.data_iterator): for point in range(batch_size): self.assertTrue(norm[1][point].data >= trim[1][point].data) def test_audio_preprocessors(self): batch_size = 5 dl = nemo_asr.AudioToTextDataLayer( # featurizer_config=self.featurizer_config, manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=batch_size, # placement=DeviceType.GPU, drop_last=True, shuffle=False, ) installed_torchaudio = True try: import torchaudio except ModuleNotFoundError: installed_torchaudio = False with self.assertRaises(ModuleNotFoundError): to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor( n_fft=400, window=None) with self.assertRaises(ModuleNotFoundError): to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15) if installed_torchaudio: to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor( n_fft=400, window=None) to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15) to_melspec = nemo_asr.AudioToMelSpectrogramPreprocessor(features=50) for batch in dl.data_iterator: input_signals, seq_lengths, _, _ = batch input_signals = input_signals.to(to_melspec._device) seq_lengths = seq_lengths.to(to_melspec._device) melspec = to_melspec.forward(input_signals, seq_lengths) if installed_torchaudio: spec = to_spectrogram.forward(input_signals, seq_lengths) mfcc = to_mfcc.forward(input_signals, seq_lengths) # Check that number of features is what we expect self.assertTrue(melspec[0].shape[1] == 50) if installed_torchaudio: self.assertTrue(spec[0].shape[1] == 201) # n_fft // 2 + 1 bins self.assertTrue(mfcc[0].shape[1] == 15) # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid") def test_jasper_training(self): with open( os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file: jasper_model_definition = self.yaml.load(file) dl = nemo_asr.AudioToTextDataLayer( # featurizer_config=self.featurizer_config, manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4, ) pre_process_params = { 'frame_splicing': 1, 'features': 64, 'window_size': 0.02, 'n_fft': 512, 'dither': 1e-05, 'window': 'hann', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_stride': 0.01, } preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor( **pre_process_params) jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_model_definition[ 'AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len( self.labels)) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) # DAG audio_signal, a_sig_length, transcript, transcript_len = dl() processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) # logging.info(jasper_encoder) log_probs = jasper_decoder(encoder_output=encoded) loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}' ), ) # Instantiate an optimizer to perform `train` action optimizer = self.nf.get_trainer() optimizer.train( [loss], callbacks=[callback], optimizer="sgd", optimization_params={ "num_epochs": 10, "lr": 0.0003 }, ) # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid") def test_double_jasper_training(self): with open( os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file: jasper_model_definition = self.yaml.load(file) dl = nemo_asr.AudioToTextDataLayer( # featurizer_config=self.featurizer_config, manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4, ) pre_process_params = { 'frame_splicing': 1, 'features': 64, 'window_size': 0.02, 'n_fft': 512, 'dither': 1e-05, 'window': 'hann', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_stride': 0.01, } preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor( **pre_process_params) jasper_encoder1 = nemo_asr.JasperEncoder( feat_in=jasper_model_definition[ 'AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) jasper_encoder2 = nemo_asr.JasperEncoder( feat_in=jasper_model_definition[ 'AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max") jasper_decoder1 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len( self.labels)) jasper_decoder2 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len( self.labels)) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) # DAG audio_signal, a_sig_length, transcript, transcript_len = dl() processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) encoded1, encoded_len1 = jasper_encoder1(audio_signal=processed_signal, length=p_length) encoded2, encoded_len2 = jasper_encoder2(audio_signal=processed_signal, length=p_length) log_probs1 = jasper_decoder1(encoder_output=encoded1) log_probs2 = jasper_decoder2(encoder_output=encoded2) log_probs = mx_max1(x1=log_probs1, x2=log_probs2) encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2) loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss], print_func=lambda x: logging.info(str(x[0].item()))) # Instantiate an optimizer to perform `train` action optimizer = self.nf.get_trainer() optimizer.train( [loss], callbacks=[callback], optimizer="sgd", optimization_params={ "num_epochs": 10, "lr": 0.0003 }, ) # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid") def test_quartznet_training(self): with open( os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/quartznet_test.yaml"))) as f: quartz_model_definition = self.yaml.load(f) dl = nemo_asr.AudioToTextDataLayer( manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4, ) pre_process_params = { 'frame_splicing': 1, 'features': 64, 'window_size': 0.02, 'n_fft': 512, 'dither': 1e-05, 'window': 'hann', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_stride': 0.01, } preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor( **pre_process_params) jasper_encoder = nemo_asr.JasperEncoder( feat_in=quartz_model_definition[ 'AudioToMelSpectrogramPreprocessor']['features'], **quartz_model_definition['JasperEncoder'], ) jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len( self.labels)) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) # DAG audio_signal, a_sig_length, transcript, transcript_len = dl() processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) log_probs = jasper_decoder(encoder_output=encoded) loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}' ), ) # Instantiate an optimizer to perform `train` action optimizer = self.nf.get_trainer() optimizer.train( [loss], callbacks=[callback], optimizer="sgd", optimization_params={ "num_epochs": 10, "lr": 0.0003 }, ) def test_stft_conv(self): with open( os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file: jasper_model_definition = self.yaml.load(file) dl = nemo_asr.AudioToTextDataLayer( manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4, ) pre_process_params = { 'frame_splicing': 1, 'features': 64, 'window_size': 0.02, 'n_fft': 512, 'dither': 1e-05, 'window': 'hann', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_stride': 0.01, 'stft_conv': True, } preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor( **pre_process_params) jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_model_definition[ 'AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len( self.labels)) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) # DAG audio_signal, a_sig_length, transcript, transcript_len = dl() processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) # logging.info(jasper_encoder) log_probs = jasper_decoder(encoder_output=encoded) loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss], print_func=lambda x: logging.info(str(x[0].item()))) # Instantiate an optimizer to perform `train` action optimizer = self.nf.get_trainer() optimizer.train( [loss], callbacks=[callback], optimizer="sgd", optimization_params={ "num_epochs": 10, "lr": 0.0003 }, ) def test_clas(self): with open('examples/asr/experimental/configs/garnet_an4.yaml') as file: cfg = self.yaml.load(file) dl = nemo_asr.AudioToTextDataLayer( manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4, ) pre_process_params = { 'frame_splicing': 1, 'features': 64, 'window_size': 0.02, 'n_fft': 512, 'dither': 1e-05, 'window': 'hann', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_stride': 0.01, 'stft_conv': True, } preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor( **pre_process_params) encoder = nemo_asr.JasperEncoder( jasper=cfg['encoder']['jasper'], activation=cfg['encoder']['activation'], feat_in=cfg['input']['train']['features'], ) connector = nemo_asr.JasperRNNConnector( in_channels=cfg['encoder']['jasper'][-1]['filters'], out_channels=cfg['decoder']['hidden_size'], ) decoder = nemo.backends.pytorch.common.DecoderRNN( voc_size=len(self.labels), bos_id=0, hidden_size=cfg['decoder']['hidden_size'], attention_method=cfg['decoder']['attention_method'], attention_type=cfg['decoder']['attention_type'], in_dropout=cfg['decoder']['in_dropout'], gru_dropout=cfg['decoder']['gru_dropout'], attn_dropout=cfg['decoder']['attn_dropout'], teacher_forcing=cfg['decoder']['teacher_forcing'], curriculum_learning=cfg['decoder']['curriculum_learning'], rnn_type=cfg['decoder']['rnn_type'], n_layers=cfg['decoder']['n_layers'], tie_emb_out_weights=cfg['decoder']['tie_emb_out_weights'], ) loss = nemo.backends.pytorch.common.SequenceLoss() # DAG audio_signal, a_sig_length, transcripts, transcript_len = dl() processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) encoded, encoded_len = encoder(audio_signal=processed_signal, length=p_length) encoded = connector(tensor=encoded) log_probs, _ = decoder(targets=transcripts, encoder_outputs=encoded) loss = loss(log_probs=log_probs, targets=transcripts) # Train callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss], print_func=lambda x: logging.info(str(x[0].item()))) # Instantiate an optimizer to perform `train` action optimizer = self.nf.get_trainer() optimizer.train( [loss], callbacks=[callback], optimizer="sgd", optimization_params={ "num_epochs": 10, "lr": 0.0003 }, ) def test_jasper_eval(self): with open( os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file: jasper_model_definition = self.yaml.load(file) dl = nemo_asr.AudioToTextDataLayer( manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4, ) pre_process_params = { 'frame_splicing': 1, 'features': 64, 'window_size': 0.02, 'n_fft': 512, 'dither': 1e-05, 'window': 'hann', 'sample_rate': 16000, 'normalize': 'per_feature', 'window_stride': 0.01, } preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor( **pre_process_params) jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_model_definition[ 'AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition['JasperEncoder'], ) jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len( self.labels)) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) greedy_decoder = nemo_asr.GreedyCTCDecoder() # DAG audio_signal, a_sig_length, transcript, transcript_len = dl() processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length) encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) # logging.info(jasper_encoder) log_probs = jasper_decoder(encoder_output=encoded) loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) predictions = greedy_decoder(log_probs=log_probs) from nemo.collections.asr.helpers import ( process_evaluation_batch, process_evaluation_epoch, ) eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[loss, predictions, transcript, transcript_len], user_iter_callback=lambda x, y: process_evaluation_batch( x, y, labels=self.labels), user_epochs_done_callback=process_evaluation_epoch, ) # Instantiate an optimizer to perform `train` action self.nf.eval(callbacks=[eval_callback])
def load(self): """Load the data about Stylesheet Assets and the new CSS content.""" yaml = YAML(typ="safe") yaml.register_class(StylesheetData) yaml.register_class(StylesheetImageList) yaml.register_class(LocalStylesheetImage) yaml.register_class(StoredStylesheetImage) yaml.register_class(RemoteStylesheetImage) logger.debug("Loading serialized StylesheetData class from: " "'{}'".format(self.config["data_file"], )) try: with open(self.config["data_file"], "r") as yaml_stream: self.stylesheet_data = yaml.load(yaml_stream) except OSError as error: raise FileReadingException( error, "the Stylesheet Data file", ) from error logger.debug("Loading CSS content from: '{}'".format( self.stylesheet_data.css_file, )) try: with open(self.stylesheet_data.css_file, "r", encoding="utf-8") \ as css_stream: self.css_content = css_stream.read() except OSError as error: raise FileReadingException( error, "the CSS file", ) from error
def create_all_dags(args, neural_factory): logger = neural_factory.logger yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) vocab = jasper_params['labels'] sample_rate = jasper_params['sample_rate'] # Calculate num_workers for dataloader total_cpus = os.cpu_count() cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1) # perturb_config = jasper_params.get('perturb', None) train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"]) del train_dl_params["train"] del train_dl_params["eval"] # del train_dl_params["normalize_transcripts"] data_layer = nemo_asr.AudioToTextDataLayer( manifest_filepath=args.train_dataset, sample_rate=sample_rate, labels=vocab, batch_size=args.batch_size, num_workers=cpu_per_traindl, **train_dl_params, # normalize_transcripts=False ) N = len(data_layer) steps_per_epoch = int(N / (args.batch_size * args.num_gpus)) logger.info('Have {0} examples to train on.'.format(N)) data_preprocessor = nemo_asr.AudioPreprocessing( sample_rate=sample_rate, **jasper_params["AudioPreprocessing"]) multiply_batch_config = jasper_params.get('MultiplyBatch', None) if multiply_batch_config: multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config) spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None) if spectr_augment_config: data_spectr_augmentation = nemo_asr.SpectrogramAugmentation( **spectr_augment_config) eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"]) del eval_dl_params["train"] del eval_dl_params["eval"] data_layers_eval = [] if args.eval_datasets: for eval_datasets in args.eval_datasets: data_layer_eval = nemo_asr.AudioToTextDataLayer( manifest_filepath=eval_datasets, sample_rate=sample_rate, labels=vocab, batch_size=args.eval_batch_size, num_workers=cpu_per_traindl, **eval_dl_params, ) data_layers_eval.append(data_layer_eval) else: neural_factory.logger.info("There were no val datasets passed") jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_params["AudioPreprocessing"]["features"], **jasper_params["JasperEncoder"]) jasper_decoder = nemo_asr.JasperDecoderForCTC( feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab), factory=neural_factory) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab)) greedy_decoder = nemo_asr.GreedyCTCDecoder() logger.info('================================') logger.info( f"Number of parameters in encoder: {jasper_encoder.num_weights}") logger.info( f"Number of parameters in decoder: {jasper_decoder.num_weights}") logger.info(f"Total number of parameters in decoder: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}") logger.info('================================') # Train DAG audio_signal_t, a_sig_length_t, \ transcript_t, transcript_len_t = data_layer() processed_signal_t, p_length_t = data_preprocessor( input_signal=audio_signal_t, length=a_sig_length_t) if multiply_batch_config: processed_signal_t, p_length_t, transcript_t, transcript_len_t = \ multiply_batch( in_x=processed_signal_t, in_x_len=p_length_t, in_y=transcript_t, in_y_len=transcript_len_t) if spectr_augment_config: processed_signal_t = data_spectr_augmentation( input_spec=processed_signal_t) encoded_t, encoded_len_t = jasper_encoder(audio_signal=processed_signal_t, length=p_length_t) log_probs_t = jasper_decoder(encoder_output=encoded_t) predictions_t = greedy_decoder(log_probs=log_probs_t) loss_t = ctc_loss(log_probs=log_probs_t, targets=transcript_t, input_length=encoded_len_t, target_length=transcript_len_t) # Callbacks needed to print info to console and Tensorboard train_callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss_t, predictions_t, transcript_t, transcript_len_t], print_func=partial(monitor_asr_train_progress, labels=vocab, logger=logger), get_tb_values=lambda x: [("loss", x[0])], tb_writer=neural_factory.tb_writer, ) chpt_callback = nemo.core.CheckpointCallback( folder=neural_factory.checkpoint_dir, step_freq=args.checkpoint_save_freq) callbacks = [train_callback, chpt_callback] # assemble eval DAGs for i, eval_dl in enumerate(data_layers_eval): audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e = \ eval_dl() processed_signal_e, p_length_e = data_preprocessor( input_signal=audio_signal_e, length=a_sig_length_e) encoded_e, encoded_len_e = jasper_encoder( audio_signal=processed_signal_e, length=p_length_e) log_probs_e = jasper_decoder(encoder_output=encoded_e) predictions_e = greedy_decoder(log_probs=log_probs_e) loss_e = ctc_loss(log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e) # create corresponding eval callback tagname = os.path.basename(args.eval_datasets[i]).split(".")[0] eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[ loss_e, predictions_e, transcript_e, transcript_len_e ], user_iter_callback=partial(process_evaluation_batch, labels=vocab), user_epochs_done_callback=partial(process_evaluation_epoch, tag=tagname, logger=logger), eval_step=args.eval_freq, tb_writer=neural_factory.tb_writer) callbacks.append(eval_callback) return loss_t, callbacks, steps_per_epoch
def get_yaml(path: str) -> CommentedMap: bytes_data = get_data(path) # Replace CRLF or yaml loader will load extra lines string_data = bytes_data.decode('utf8').replace('\r\n', '\n') ret = YAML().load(string_data) return ret
from datetime import datetime import os import pathlib from subprocess import check_call from ruamel.yaml import YAML MINICONDA_VERSION = '4.3.27' HERE = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) ENV_FILE = 'environment.yml' FROZEN_FILE = 'environment.frozen.yml' yaml = YAML(typ='rt') def fixup(frozen_file): """Fixup a frozen environment file Conda export has a bug! https://github.com/conda/conda/pull/6391 """ with open(frozen_file) as f: env = yaml.load(f) # scrub spurious pip dependencies # due to conda #6391 # note: this scrubs *all* pip dependencies,
newkey = '' lineNum += 1 return ret_val with open(os.path.join(ISTIO_IO_DIR, CONFIG_INDEX_DIR), 'r') as f: endReached = False data = f.read().split('\n') for d in data: print d if "<!-- AUTO-GENERATED-START -->" in d: break # transform values.yaml into a encoded string dictionary yaml = YAML() yaml.explicit_start = True yaml.dump('', sys.stdout, transform=decode_helm_yaml) # Order the encoded string dictionary od = collections.OrderedDict(sorted(prdict.items(), key=lambda t: t[0])) # Print encoded string dictionary for k, v in od.items(): print("## `%s` options\n" % k) print '| Key | Default Value | Description |' print '| --- | --- | --- |' for value in v: print('%s' % (value)) print('')
def pin_dependencies_in_conda_env_file_from_version_spec( filepath, versions_to_pin, dry_run=False): ''' Pin package versions to a given spec Parameters ---------- filepath : str Conda environment yml file to be pinned versions_to_pin : dict Dictionary of package specs, with keys package sources (e.g. ``conda``, ``pip``), and values dictionaries of package names and pinned versions. dry_run : bool Print the updated environment files, rather than overwriting them. Default False. ''' indent_config = dict(mapping=2, sequence=2, offset=2) yaml = YAML(typ='rt') yaml.indent(**indent_config) yaml.default_flow_style = False with open(filepath, 'r') as f: file_spec = yaml.load(f) for di, dep in enumerate(file_spec['dependencies']): if isinstance(dep, dict): for k, v in dep.items(): for si, subdep in enumerate(v): pinned, comment = _determine_pinned_version( subdep, versions_to_pin[k]) file_spec['dependencies'][di][k][si] = pinned if comment is not None: file_spec['dependencies'][di][k].yaml_add_eol_comment( comment, si) else: pinned, comment = _determine_pinned_version(dep, versions_to_pin['conda']) file_spec['dependencies'][di] = pinned if comment is not None: file_spec['dependencies'].yaml_add_eol_comment( comment, di) if dry_run: sys.stdout.write("filename: {}\n{}\n".format(filepath, '-'*50)) with YAML(output=sys.stdout) as yaml: yaml.indent(**indent_config) yaml.dump(file_spec) sys.stdout.write("\n") else: with open(filepath, 'w+') as f: yaml.dump(file_spec, f)
"meta", attrs=dict(name="csrf-token"))["content"] result = requests.post( "https://hackmd.io/new", data={"content": source}, headers={ "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", "X-XSRF-Token": token, "User-Agent": "HackMD Python client", }, cookies=response.cookies, ) return result.url if __name__ == "__main__": yaml = YAML() g = Github(os.getenv("VSF_BOT_TOKEN")) with open("../.github/workflows/open_org_issue.yml") as f: data = yaml.load(f) minutes, hours_utc, *_ = data["on"]["schedule"][0]["cron"].split() # Format meeting time for the *upcoming* meeting meeting_time = datetime.now() + timedelta(days=7) meeting_time = meeting_time.replace(hour=int(hours_utc), minute=int(minutes)) formatted_time = ( f"{meeting_time + timedelta(hours=2):%-H:%M} European " f"/ {meeting_time - timedelta(hours=4):%-I:%M %p} Eastern") # *Today's* meeting will be next week's previous meeting previous_meeting_time = datetime.now().replace(hour=int(hours_utc),
def unpin_dependencies_in_conda_env_file(filepath, dry_run=False): ''' Un-pin dependencies in conda environment file If encounters dependencies with ``# pinkeep: pkg=vers`` directives, these are preserved verbatim in the final spec. Paramters --------- filepath : str Path to the environment file to unpin dry_run : bool, optional Print rather than modify the environment file ''' indent_config = dict(mapping=2, sequence=2, offset=2) yaml = YAML(typ='rt') yaml.indent(**indent_config) yaml.default_flow_style = False with open(filepath, 'r') as f: file_spec = yaml.load(f) for di, dep in enumerate(file_spec['dependencies']): if isinstance(dep, dict): for k, v in dep.items(): for si, subdep in enumerate(v): file_spec['dependencies'][di][k][si] = _unpin_dependency( file_spec['dependencies'][di][k], si) else: file_spec['dependencies'][di] = _unpin_dependency( file_spec['dependencies'], di) if dry_run: sys.stdout.write("filename: {}\n{}\n".format(filepath, '-'*50)) with YAML(output=sys.stdout) as yaml: yaml.indent(**indent_config) yaml.dump(file_spec) sys.stdout.write("\n") else: with open(filepath, 'w+') as f: yaml.dump(file_spec, f)
import subprocess import sys import tempfile import pytest from contextlib import redirect_stderr, redirect_stdout from pathlib import Path from textwrap import dedent from ruamel.yaml import YAML from auth import KeyProvider from utils import print_colour from file_acquisition import get_decrypted_file, get_decrypted_files # Without `pure=True`, I get an exception about str / byte issues yaml = YAML(typ="safe", pure=True) helm_charts_dir = Path(__file__).parent.parent.joinpath("helm-charts") class Hub: """ A single, deployable JupyterHub """ def __init__(self, cluster, spec): self.cluster = cluster self.spec = spec def get_generated_config(self, auth_provider: KeyProvider, secret_key): """ Generate config automatically for each hub
from ipam.models import Role from ruamel.yaml import YAML from pathlib import Path import sys file = Path('/opt/netbox/initializers/prefix_vlan_roles.yml') if not file.is_file(): sys.exit() with file.open('r') as stream: yaml = YAML(typ='safe') roles = yaml.load(stream) if roles is not None: for params in roles: role, created = Role.objects.get_or_create(**params) if created: print("⛹️ Created Prefix/VLAN Role", role.name)
from ruamel.yaml import YAML if len(sys.argv) != 3: print("Script call: <name> <switch_item_name> <switch_item_status>") sys.exit(1) switch_item_name = sys.argv[1] switch_item_status = sys.argv[2] data_dir = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.abspath(os.path.dirname(data_dir + "/../../broadlink-data/")) #print (data_dir) content = open(data_dir + "/system.yml", "r").read() yaml = YAML() system_dictionary = yaml.load(content) content = open(data_dir + "/irb.yml", "r").read() yaml = YAML() mapping_dictionary = yaml.load(content) code = mapping_dictionary["mapping_dictionary"][switch_item_name][ switch_item_status] operations = "" if (code != ""): operations += "start" try:
from great_expectations.data_context.util import file_relative_path from great_expectations.util import lint_code from great_expectations.validation_operators.types.validation_operator_result import ( ValidationOperatorResult, ) try: from sqlalchemy.exc import SQLAlchemyError except ImportError: SQLAlchemyError = RuntimeError try: from sqlalchemy.exc import SQLAlchemyError except ImportError: SQLAlchemyError = RuntimeError yaml = YAML() yaml.indent(mapping=2, sequence=4, offset=2) """ --ge-feature-maturity-info-- id: checkpoint_command_line title: LegacyCheckpoint - Command Line icon: short_description: Run a configured legacy checkpoint from a command line. description: Run a configured legacy checkpoint from a command line in a Terminal shell. how_to_guide_url: https://docs.greatexpectations.io/en/latest/guides/how_to_guides/validation/how_to_run_a_checkpoint_in_terminal.html maturity: Experimental maturity_details: api_stability: Unstable (expect changes to batch request; no checkpoint store) implementation_completeness: Complete unit_test_coverage: Complete
from scripts import mkimage, createWorkflowYaml, createFat32BootYaml from workflow.mainConfig import MainConfig logPath = "./" fileName = "Disk" logging.basicConfig( level=logging.INFO, format= "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s", handlers=[ logging.FileHandler("{0}/{1}.log".format(logPath, fileName)), logging.StreamHandler() ]) yaml = YAML() yaml.register_class(MainConfig) mainConfig = None cwd = os.getcwd() def readConfig(path): global mainConfig # deserialize fat config file with open(path, 'r') as inp: mainConfig = yaml.load(inp) def prepareDisk():
def update_lastmod(verbose): count = 0 yaml = YAML() for post in glob.glob(os.path.join(POSTS_PATH, "*.md")): git_log_count = subprocess.getoutput( "git log --pretty=%ad {} | wc -l".format(post)) if git_log_count == "1": continue git_lastmod = subprocess.getoutput( "git log -1 --pretty=%ad --date=iso " + post) if not git_lastmod: continue lates_commit = subprocess.getoutput("git log -1 --pretty=%B " + post) if "[Automation]" in lates_commit and "Lastmod" in lates_commit: continue frontmatter, line_num = get_yaml(post) meta = yaml.load(frontmatter) if 'seo' in meta: if ('date_modified' in meta['seo'] and meta['seo']['date_modified'] == git_lastmod): continue else: meta['seo']['date_modified'] = git_lastmod else: meta.insert(line_num, 'seo', dict(date_modified=git_lastmod)) output = 'new.md' if os.path.isfile(output): os.remove(output) with open(output, 'w') as new, open(post, 'r') as old: new.write("---\n") yaml.dump(meta, new) new.write("---\n") line_num += 2 lines = old.readlines() for line in lines: if line_num > 0: line_num -= 1 continue else: new.write(line) shutil.move(output, post) count += 1 if verbose: print("[INFO] update 'lastmod' for:" + post) if count > 0: print("[INFO] Success to update lastmod for {} post(s).".format(count))
def _get_yaml(): y = YAML(typ='safe') y.default_flow_style = False return y
from great_expectations.data_context.templates import CONFIG_VARIABLES_TEMPLATE from great_expectations.exceptions import ConfigNotFoundError try: from unittest import mock except ImportError: import mock from six import PY2 from great_expectations.cli import cli from great_expectations.util import gen_directory_tree_str from great_expectations import __version__ as ge_version from .test_utils import assertDeepAlmostEqual yaml = YAML() yaml.default_flow_style = False def test_cli_command_entrance(): runner = CliRunner() result = runner.invoke(cli) assert result.exit_code == 0 assert result.output == """Usage: cli [OPTIONS] COMMAND [ARGS]... great_expectations command-line interface Options: --version Show the version and exit. -v, --verbose Set great_expectations to use verbose output.
from django.contrib.auth.models import Group, User from ruamel.yaml import YAML with open('/opt/netbox/initializers/groups.yml', 'r') as stream: yaml = YAML(typ='safe') groups = yaml.load(stream) if groups is not None: for groupname, group_details in groups.items(): group, created = Group.objects.get_or_create(name=groupname) if created: print("👥 Created group", groupname) for username in group_details['users']: user = User.objects.get(username=username) if user: user.groups.add(group)
# coding=utf-8 import os, json, io from flask import Flask, jsonify, request from flask_cors import CORS from ruamel.yaml import YAML yaml_parser = YAML() #typ="safe" app = Flask(__name__) CORS(app) def get_YAML_string(obj): strngio = io.StringIO() yaml_parser.dump(obj, strngio) strngio.seek(0) yamlstr = strngio.read() strngio.close() return yamlstr @app.route('/questions/<int:number>') def get_question(number: int): # Number is base 1 jsonpath = os.path.join( os.path.split(os.path.split(__file__)[0])[0], 'static', 'Questions.json') with open(jsonpath, "rt") as opf: jsonstring = opf.read() qdct = json.loads(jsonstring) questionobj = qdct[number - 1]
def read_yaml_file(file_path: str) -> dict: with open(file_path, 'r', encoding='utf-8') as yaml_file: yaml = YAML(typ='safe') return yaml.load(yaml_file)
# You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This module handles string related IO. """ from io import StringIO from ruamel.yaml import YAML yaml = YAML(typ='unsafe') def read_yaml_str(content: str) -> object: """Parse the given yaml str and return the python object.""" return yaml.load(content) def to_yaml_str(obj: object) -> str: """Converts the given python object into a YAML string.""" stream = StringIO() yaml.dump(obj, stream) return stream.getvalue()
# # You should have received a copy of the GNU General Public License # along with Exhibition. If not, see <https://www.gnu.org/licenses/>. # ## from collections import OrderedDict from importlib import import_module import hashlib import pathlib from ruamel.yaml import YAML from .config import Config yaml_parser = YAML(typ="safe") DATA_EXTRACTORS = { ".yaml": yaml_parser.load, ".json": yaml_parser.load, } DEFAULT_STRIP_EXTS = [".html"] DEFAULT_INDEX_FILE = "index.html" class Node: """ A node represents a file or directory """ _meta_names = ["meta.yaml", "meta.yml"]
def create_all_dags(args, neural_factory): yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) labels = jasper_params['labels'] # Vocab of tokens sample_rate = jasper_params['sample_rate'] # Calculate num_workers for dataloader total_cpus = os.cpu_count() cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1) # perturb_config = jasper_params.get('perturb', None) train_dl_params = copy.deepcopy( jasper_params["AudioToSpeechLabelDataLayer"]) train_dl_params.update( jasper_params["AudioToSpeechLabelDataLayer"]["train"]) del train_dl_params["train"] del train_dl_params["eval"] # del train_dl_params["normalize_transcripts"] # Look for augmentations audio_augmentor = jasper_params.get('AudioAugmentor', None) data_layer = nemo_asr.AudioToSpeechLabelDataLayer( manifest_filepath=args.train_dataset, labels=labels, sample_rate=sample_rate, batch_size=args.batch_size, num_workers=cpu_per_traindl, augmentor=audio_augmentor, **train_dl_params, ) crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation( audio_length=128) N = len(data_layer) steps_per_epoch = math.ceil( N / (args.batch_size * args.iter_per_step * args.num_gpus)) logging.info('Steps per epoch : {0}'.format(steps_per_epoch)) logging.info('Have {0} examples to train on.'.format(N)) data_preprocessor = nemo_asr.AudioToMFCCPreprocessor( sample_rate=sample_rate, **jasper_params["AudioToMFCCPreprocessor"], ) spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None) if spectr_augment_config: data_spectr_augmentation = nemo_asr.SpectrogramAugmentation( **spectr_augment_config) eval_dl_params = copy.deepcopy( jasper_params["AudioToSpeechLabelDataLayer"]) eval_dl_params.update(jasper_params["AudioToSpeechLabelDataLayer"]["eval"]) del eval_dl_params["train"] del eval_dl_params["eval"] data_layers_eval = [] if args.eval_datasets: for eval_datasets in args.eval_datasets: data_layer_eval = nemo_asr.AudioToSpeechLabelDataLayer( manifest_filepath=eval_datasets, sample_rate=sample_rate, labels=labels, batch_size=args.eval_batch_size, num_workers=cpu_per_traindl, **eval_dl_params, ) data_layers_eval.append(data_layer_eval) else: logging.warning("There were no val datasets passed") jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"], ) jasper_decoder = nemo_asr.JasperDecoderForClassification( feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(labels), **jasper_params['JasperDecoderForClassification'], ) ce_loss = nemo_asr.CrossEntropyLossNM() logging.info('================================') logging.info( f"Number of parameters in encoder: {jasper_encoder.num_weights}") logging.info( f"Number of parameters in decoder: {jasper_decoder.num_weights}") logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}") logging.info('================================') # Train DAG # --- Assemble Training DAG --- # audio_signal, audio_signal_len, commands, command_len = data_layer() processed_signal, processed_signal_len = data_preprocessor( input_signal=audio_signal, length=audio_signal_len) processed_signal, processed_signal_len = crop_pad_augmentation( input_signal=processed_signal, length=audio_signal_len) if spectr_augment_config: processed_signal = data_spectr_augmentation( input_spec=processed_signal) encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len) decoded = jasper_decoder(encoder_output=encoded) loss = ce_loss(logits=decoded, labels=commands) # Callbacks needed to print info to console and Tensorboard train_callback = nemo.core.SimpleLossLoggerCallback( # Notice that we pass in loss, predictions, and the labels (commands). # Of course we would like to see our training loss, but we need the # other arguments to calculate the accuracy. tensors=[loss, decoded, commands], # The print_func defines what gets printed. print_func=partial(monitor_classification_training_progress, eval_metric=None), get_tb_values=lambda x: [("loss", x[0])], tb_writer=neural_factory.tb_writer, ) chpt_callback = nemo.core.CheckpointCallback( folder=neural_factory.checkpoint_dir, load_from_folder=args.load_dir, step_freq=args.checkpoint_save_freq, ) callbacks = [train_callback, chpt_callback] # assemble eval DAGs for i, eval_dl in enumerate(data_layers_eval): # --- Assemble Training DAG --- # test_audio_signal, test_audio_signal_len, test_commands, test_command_len = eval_dl( ) test_processed_signal, test_processed_signal_len = data_preprocessor( input_signal=test_audio_signal, length=test_audio_signal_len) test_processed_signal, test_processed_signal_len = crop_pad_augmentation( input_signal=test_processed_signal, length=test_processed_signal_len) test_encoded, test_encoded_len = jasper_encoder( audio_signal=test_processed_signal, length=test_processed_signal_len) test_decoded = jasper_decoder(encoder_output=test_encoded) test_loss = ce_loss(logits=test_decoded, labels=test_commands) # create corresponding eval callback tagname = os.path.basename(args.eval_datasets[i]).split(".")[0] eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[test_loss, test_decoded, test_commands], user_iter_callback=partial(process_classification_evaluation_batch, top_k=1), user_epochs_done_callback=partial( process_classification_evaluation_epoch, eval_metric=1, tag=tagname), eval_step=args. eval_freq, # How often we evaluate the model on the test set tb_writer=neural_factory.tb_writer, ) callbacks.append(eval_callback) return loss, callbacks, steps_per_epoch
def _make_split_nuscenes(self): """ Use generated <self.voxel_version> output to build split. """ assert self._input_format == "nuscenes" self.sample_id_template = "nuscenes_lidarseg_{seq:04d}_{frame:04d}" self._seq_format = lambda x: "{:04d}".format(x) self._frame_format = lambda x: "{:05d}".format(x) self._label_format = lambda x: "{:05d}".format(x) self._voxel_format = lambda x: "{:06d}".format(x) # Todo: no test split option for now assert self.testset_flag is False valid_splits = ["train", "valid"] map_split_names = {"train": "train", "valid": "val", "test": "test"} # read config with open(str(self.config_semantic), "r") as file_conf_sem: yaml = YAML() data = yaml.load(file_conf_sem) self._config_data = {k: dict(v) for k, v in data.items()} data_splits = { map_split_names[k]: v for k, v in self._config_data["split"].items() if k in valid_splits } self._split = { "name": "nuscenes_voxels_{}".format( "default" if not self.testset_flag else "test" ), "data": {k: [] for k in data_splits.keys()}, } self._samples_to_generate = [] def parse_sequence_folder_name(x): try: return int(x) except ValueError: return -1 voxel_sequences = { parse_sequence_folder_name(x.name): x for x in self.semantic_kitti_voxels_root.iterdir() } for split_name, sequences in data_splits.items(): split_data = self._split["data"][split_name] for sequence_index in sequences: if not self.testset_flag: if sequence_index not in voxel_sequences: logger.warning( "Sequence " + self._seq_format(sequence_index) + " not available. Skipping." ) continue voxel_dir = voxel_sequences[sequence_index] / self.voxel_version if not voxel_dir.is_dir(): logger.warning( "Voxels not available in sequence " + self._seq_format(sequence_index) + ". Skipping." ) continue self._voxel_data_cache[sequence_index] = { int(x.stem[:6]): x for x in ( voxel_sequences[sequence_index] / self.voxel_version ).iterdir() if x.suffix == ".tfrecord" } split_data.extend( [ self.sample_id_template.format(seq=sequence_index, frame=x) for x in sorted( list(self._voxel_data_cache[sequence_index].keys()) ) ] ) self._samples_to_generate.extend( [ (sequence_index, x) for x in sorted( list(self._voxel_data_cache[sequence_index].keys()) ) ] ) else: raise NotImplementedError() self._label_mapping: dict = self._config_data["learning_map"] # make 255 the 'unlabeled' label and shift all others down (-1) accordingly self._label_mapping = { k: v - 1 if v != 0 else 255 for k, v in self._label_mapping.items() } self._label_mapping_voxels = self._label_mapping.copy() # map unlabeled to extra entry 254 when voxelizing # Todo(risteon): Is this better? # -> Map noise to 254, this will get removed when parsing # -> Map unlabed to 255 to keep for geometry training unlabeled_index_nuscenes = 32 self._label_mapping[unlabeled_index_nuscenes] = 255 self._label_mapping_voxels[0] = 254 self._label_mapping_voxels[unlabeled_index_nuscenes] = 255 assert all(x <= 255 for x in self._label_mapping.values()) assert all(x <= 255 for x in self._label_mapping_voxels.values()) self._label_mapping = np.vectorize(self._label_mapping.get, otypes=[np.int64]) self._label_mapping_voxels = np.vectorize( self._label_mapping_voxels.get, otypes=[np.int64] )
def _run_core_command( self, patterns_json: List[Any], patterns: List[Pattern], targets: List[Path], language: Language, rule: Rule, rules_file_flag: str, cache_dir: str, ) -> dict: with tempfile.NamedTemporaryFile( "w" ) as pattern_file, tempfile.NamedTemporaryFile( "w" ) as target_file, tempfile.NamedTemporaryFile( "w" ) as equiv_file: yaml = YAML() yaml.dump({"rules": patterns_json}, pattern_file) pattern_file.flush() target_file.write("\n".join(str(t) for t in targets)) target_file.flush() cmd = [SEMGREP_PATH] + [ "-lang", language, rules_file_flag, pattern_file.name, "-j", str(self._jobs), "-target_file", target_file.name, "-use_parsing_cache", cache_dir, "-timeout", str(self._timeout), "-max_memory", str(self._max_memory), ] equivalences = rule.equivalences if equivalences: self._write_equivalences_file(equiv_file, equivalences) cmd += ["-equivalences", equiv_file.name] core_run = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.debug(core_run.stderr.decode("utf-8", "replace")) if core_run.returncode != 0: output_json = self._parse_core_output(core_run.stdout) if "error" in output_json: self._raise_semgrep_error_from_json(output_json, patterns) else: raise SemgrepError( f"unexpected json output while invoking semgrep-core:\n{PLEASE_FILE_ISSUE_TEXT}" ) output_json = self._parse_core_output(core_run.stdout) return output_json
class Representer(RoundTripRepresenter): pass Representer.add_representer(OrderedDict, Representer.represent_dict) def wrap_yaml_string(s, width=100): ss = (l.rstrip() for l in s.splitlines()) ss = (l for l in ss if l) #ss = textwrap.wrap('\n'.join(ss), width=width, drop_whitespace=False, tabsize=2) return PreservedScalarString('\n'.join(ss)) yaml = YAML(typ='rt') yaml.Representer = Representer yaml.compact() yaml.default_flow_style = False def yaml_dumps(document): stream = StringIO() yaml.dump(document, stream) return stream.getvalue() def write_yaml(dir_, fn, data): if not os.path.exists(dir_): os.makedirs(dir_) with open(os.path.join(dir_, fn), 'w') as f:
def write(model_name, data, output_dir): """Write data structure to YAML and csv """ project_data, intervals, interventions, units, model_data, extra = data yaml = YAML() # project with open(project_yaml_file(output_dir), 'w', encoding='utf-8') as project_file: yaml.dump(project_data, project_file) # intervals intervals_filename = os.path.join(output_dir, 'data', 'interval_definitions', '{}_intervals.csv'.format(model_name)) with open(intervals_filename, 'w', encoding='utf-8', newline='') as intervals_file: fieldnames = ('id', 'start_hour', 'end_hour') writer = csv.DictWriter(intervals_file, fieldnames) writer.writeheader() writer.writerows(intervals) # interventions interventions_filename = os.path.join( output_dir, 'data', 'interventions', '{}_interventions.yml'.format(model_name)) with open(interventions_filename, 'w', encoding='utf-8') as interventions_file: yaml.dump(interventions, interventions_file) # units units_filename = os.path.join(output_dir, 'data', '{}_units.txt'.format(model_name)) with open(units_filename, 'w', encoding='utf-8', newline='') as units_file: fieldnames = ('unit_name', 'description') writer = csv.DictWriter(units_file, fieldnames, delimiter='=') writer.writeheader() writer.writerows(units) # model model_filename = os.path.join(output_dir, 'config', 'sector_models', '{}.yml'.format(model_name)) with open(model_filename, 'w', encoding='utf-8') as model_file: yaml.dump(model_data, model_file) # wrapper wrapper_parameters = '' for parameter in model_data['parameters']: identifier = clean('parameter_' + str(parameter['name'])) wrapper_parameters += '{0} = data.get_parameter(\'{1}\')\n\t\t'.format( identifier, parameter['name']) wrapper_parameters += 'self.logger.info(\'Parameter {1}: %s\', {0})\n\t\t'.format( identifier, str(parameter['name']).replace("_", " ").capitalize()) wrapper_inputs = '' for input in model_data['inputs']: identifier = clean('input_' + str(input['name'])) wrapper_inputs += '{0} = data.get_data("{1}")\n\t\t'.format( identifier, input['name']) wrapper_inputs += 'self.logger.info(\'Input {1}: %s\', {0})\n\t\t'.format( identifier, str(input['name']).replace("_", " ").capitalize()) wrapper_outputs = '' for output in model_data['outputs']: wrapper_outputs += 'data.set_results("{0}", None)\n\t\t'.format( output['name']) # ensure models dir exists try: os.mkdir(os.path.join(output_dir, 'models')) except FileExistsError: pass with open(WRAPPER_TEMPLATE, 'r') as source, open( os.path.join(output_dir, 'models', '{}.py'.format(model_name)), 'w') as sink: for line in source.readlines(): sink.write( line.format(model_name=model_name, model_name_rm_=model_name.replace("_", " "), model_name_cap=model_name.replace( "_", " ").capitalize(), model_parameters=wrapper_parameters, model_inputs=wrapper_inputs, model_outputs=wrapper_outputs)) # extras for sheet_name, data in extra.items(): filename = os.path.join(output_dir, '{}__{}.yml'.format(model_name, sheet_name)) with open(filename, 'w', encoding='utf-8') as file_handle: yaml.dump(data, file_handle)