async def test_lovelace_update_view(hass, hass_ws_client): """Test update_view command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') origyaml = yaml.load(TEST_YAML_A) with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=origyaml), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/update', 'view_id': 'example', 'view_config': 'id: example2\ntitle: New title\n', }) msg = await client.receive_json() result = save_yaml_mock.call_args_list[0][0][1] orig_view = origyaml.mlget(['views', 0], list_ok=True) new_view = result.mlget(['views', 0], list_ok=True) assert new_view['title'] == 'New title' assert new_view['cards'] == orig_view['cards'] assert 'theme' not in new_view assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success']
async def test_lovelace_move_card_view_position(hass, hass_ws_client): """Test move_card to view with position command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: await client.send_json({ 'id': 5, 'type': 'lovelace/config/card/move', 'card_id': 'test', 'new_view_id': 'example', 'new_position': 1, }) msg = await client.receive_json() result = save_yaml_mock.call_args_list[0][0][1] assert result.mlget(['views', 0, 'cards', 1, 'title'], list_ok=True) == 'Test card' assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success']
def parse(self, input): """parse the given file or file source string""" if hasattr(input, 'name'): self.filename = input.name elif not getattr(self, 'filename', ''): self.filename = '' if hasattr(input, "read"): src = input.read() input.close() input = src if isinstance(input, bytes): input = input.decode('utf-8') yaml = YAML() try: self._file = yaml.load(input) except YAMLError as e: message = e.problem if hasattr(e, 'problem') else e.message if hasattr(e, 'problem_mark'): message += ' {0}'.format(e.problem_mark) raise base.ParseError(message) self._file = self.preprocess(self._file) for k, data in self._flatten(self._file): unit = self.UnitClass(data) unit.setid(k) self.addunit(unit)
def handle(self, *args, **options): def flatten(l): return [item for sublist in l for item in sublist] yaml = YAML() with open(options['yaml']) as yamlfile: data = yaml.load(yamlfile) for attribute in flatten(data['attributes'].values()): SuomiFiUserAttribute.objects.update_or_create( friendly_name=attribute['friendly_name'], uri=attribute['uri'], name=attribute['name'], description=attribute['description'] ) for level, details in data['access_levels'].items(): access_level, created = SuomiFiAccessLevel.objects.update_or_create(shorthand=level) for language, name in details['name'].items(): access_level.set_current_language(language) access_level.name = name for language, description in details['description'].items(): access_level.set_current_language(language) access_level.description = description for attribute in flatten(details['fields']): access_level.attributes.add(SuomiFiUserAttribute.objects.get(friendly_name=attribute['friendly_name'])) access_level.save()
def mocked_config_file_path( fake_temp_data_pocketsphinx_dic, fake_temp_data_pocketsphinx_lm, tmpdir_factory ): path_to_pocketsphix_dic = os.path.join( str(fake_temp_data_pocketsphinx_dic), "fake.dic" ) path_to_pocketsphix_lm = os.path.join( str(fake_temp_data_pocketsphinx_lm), "fake.lm" ) # config part base = tempfile.mkdtemp() config_file = os.path.join(base, "config.yaml") yaml = YAML() m_cfg = yaml.load(COMMON_MOCKED_CONFIG) m_cfg["pocketsphinx"]["dic"] = path_to_pocketsphix_dic m_cfg["pocketsphinx"]["lm"] = path_to_pocketsphix_lm with open(config_file, "w", encoding="utf-8") as fp: yaml.dump(m_cfg, fp) yield config_file shutil.rmtree(base)
def obj_from_file(filename='annotation.yaml', filetype='auto'): ''' Read object from file ''' if filetype == 'auto': _, ext = os.path.splitext(filename) filetype = ext[1:] if filetype in ('yaml', 'yml'): from ruamel.yaml import YAML yaml = YAML(typ="unsafe") with open(filename, encoding="utf-8") as f: obj = yaml.load(f) if obj is None: obj = {} # import yaml # with open(filename, encoding="utf-8") as f: # intext = f.read() # obj = yaml.load(intext) elif filetype in ('pickle', 'pkl', 'pklz', 'picklezip'): fcontent = read_pkl_and_pklz(filename) # import pickle if sys.version_info[0] < 3: import cPickle as pickle else: import _pickle as pickle # import sPickle as pickle if sys.version_info.major == 2: obj = pickle.loads(fcontent) else: obj = pickle.loads(fcontent, encoding="latin1") else: logger.error('Unknown filetype ' + filetype) return obj
async def test_lovelace_get_view(hass, hass_ws_client): """Test get_view command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)): await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/get', 'view_id': 'example', }) msg = await client.receive_json() assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success'] assert "".join(msg['result'].split()) == "".join('title: Example\n # \ Optional unique id for direct\ access /lovelace/${id}\nid: example\n # Optional\ background (overwrites the global background).\n\ background: radial-gradient(crimson, skyblue)\n\ # Each view can have a different theme applied.\n\ theme: dark-mode\n'.split())
def test_to_file(self): filename = "ff_test.yaml" b = self.benzene b.to_file(filename=filename) yaml = YAML(typ="safe") with open(filename, "r") as f: d = yaml.load(f) self.assertListEqual(d["mass_info"], [list(m) for m in b.mass_info]) self.assertListEqual(d["pair_coeffs"], b.pair_coeffs)
def test_to_file(self): filename = "ff_test.yaml" v = self.virus v.to_file(filename=filename) yaml = YAML(typ="safe") with open(filename, "r") as f: d = yaml.load(f) self.assertListEqual(d["mass_info"], [list(m) for m in v.mass_info]) self.assertListEqual(d["nonbond_coeffs"], v.nonbond_coeffs)
def test_id_not_changed(): """Test if id is not changed if already exists.""" yaml = YAML(typ='rt') fname = "dummy.yaml" with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_B)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: migrate_config(fname) assert save_yaml_mock.call_count == 0
class TestYAML(unittest.TestCase): """Test lovelace.yaml save and load.""" def setUp(self): """Set up for tests.""" self.tmp_dir = mkdtemp() self.yaml = YAML(typ='rt') def tearDown(self): """Clean up after tests.""" for fname in os.listdir(self.tmp_dir): os.remove(os.path.join(self.tmp_dir, fname)) os.rmdir(self.tmp_dir) def _path_for(self, leaf_name): return os.path.join(self.tmp_dir, leaf_name+".yaml") def test_save_and_load(self): """Test saving and loading back.""" fname = self._path_for("test1") open(fname, "w+") util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A)) data = util_yaml.load_yaml(fname, True) assert data == self.yaml.load(TEST_YAML_A) def test_overwrite_and_reload(self): """Test that we can overwrite an existing file and read back.""" fname = self._path_for("test2") open(fname, "w+") util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A)) util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_B)) data = util_yaml.load_yaml(fname, True) assert data == self.yaml.load(TEST_YAML_B) def test_load_bad_data(self): """Test error from trying to load unserialisable data.""" fname = self._path_for("test3") with open(fname, "w") as fh: fh.write(TEST_BAD_YAML) with pytest.raises(HomeAssistantError): util_yaml.load_yaml(fname, True)
def test_save_yaml_model(tmpdir, mini_model): jsonschema = pytest.importorskip("jsonschema") """Test the writing of YAML model.""" output_file = tmpdir.join("mini.yml") cio.save_yaml_model(mini_model, output_file.strpath, sort=True) # validate against schema yaml = YAML(typ="unsafe") with open(output_file.strpath, "r") as infile: yaml_to_dict = yaml.load(infile) dict_to_json = json.dumps(yaml_to_dict) loaded = json.loads(dict_to_json) assert jsonschema.validate(loaded, cio.json.json_schema)
def from_file(cls, filename): """ Constructor that reads in a file in YAML format. Args: filename (str): Filename. """ yaml = YAML(typ="safe") with open(filename, "r") as f: d = yaml.load(f) return cls.from_dict(d)
def edit_tmpvault(filename): '''Update yaml config and by changing any key with the value CHANGE_AND_REKEY requests a master password and uses pbkdf2 to get a master key to base all of the new keys off of ''' yaml = YAML() with open(filename) as fobj: vault_dict = yaml.load(fobj) master_pass = getpass.getpass("Enter master key to generate values: ").encode('utf-8') master_key = hashlib.pbkdf2_hmac('sha256', master_pass, os.urandom(16), 100000) change_values(vault_dict, 'CHANGE_AND_REKEY', master_key) with open(filename, 'w') as fobj: yaml.dump(vault_dict, fobj)
def vt2esofspy(vesseltree, outputfilename="tracer.txt", axisorder=[0, 1, 2]): """ exports vesseltree to esofspy format :param vesseltree: filename or vesseltree dictionary structure :param outputfilename: output file name :param axisorder: order of axis can be specified with this option :return: """ if (type(vesseltree) == str) and os.path.isfile(vesseltree): from ruamel.yaml import YAML yaml = YAML(typ="unsafe") with open(vesseltree, encoding="utf-8") as f: vt = yaml.load(f) else: vt = vesseltree logger.debug(str(vt['general'])) logger.debug(str(vt.keys())) vtgm = vt['graph']['microstructure'] lines = [] vs = vt['general']['voxel_size_mm'] sh = vt['general']['shape_px'] # switch axis ax = axisorder lines.append("#Tracer+\n") lines.append("#voxelsize mm %f %f %f\n" % (vs[ax[0]], vs[ax[1]], vs[ax[2]])) lines.append("#shape %i %i %i\n" % (sh[ax[0]], sh[ax[1]], sh[ax[2]])) lines.append(str(len(vtgm) * 2)+"\n") i = 1 for id in vtgm: try: nda = vtgm[id]['nodeA_ZYX'] ndb = vtgm[id]['nodeB_ZYX'] lines.append("%i\t%i\t%i\t%i\n" % (nda[ax[0]], nda[ax[1]], nda[ax[2]], i)) lines.append("%i\t%i\t%i\t%i\n" % (ndb[ax[0]], ndb[ax[1]], ndb[ax[2]], i)) i += 1 except: pass lines.append("%i\t%i\t%i\t%i" % (0, 0, 0, 0)) lines[3] = str(i - 1) + "\n" from builtins import str as text with open(outputfilename, 'wt') as f: for line in lines: f.write(text(line))
def test_add_id(): """Test if id is added.""" yaml = YAML(typ='rt') fname = "dummy.yaml" with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: migrate_config(fname) result = save_yaml_mock.call_args_list[0][0][1] assert 'id' in result['views'][0]['cards'][0] assert 'id' in result['views'][1]
def loadtestDictsFromFilePaths(self, testFilePaths): """Parses yaml files from given filepaths :param testFilePaths: file names to parse :type testFilePaths: list of strings :return: list of dict parsed from the yaml :rtype: list of dicts """ testDicts = [] yaml = YAML() for testFile in testFilePaths: with open(testFile) as f: testDict = yaml.load(f) testDicts.append(dict(testDict)) return testDicts
def get_default_opttask_kwargs(): """ Get the default configuration kwargs for OptTask. Args: None Returns: conf_dict (dict): The default kwargs for OptTask """ cwd = os.path.dirname(os.path.realpath(__file__)) fname = os.path.join(cwd, "defaults.yaml") with open(fname, 'r') as config_raw: yaml = YAML() conf_dict = dict(yaml.load(config_raw)) return conf_dict
async def test_lovelace_get_card(hass, hass_ws_client): """Test get_card command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)): await client.send_json({ 'id': 5, 'type': 'lovelace/config/card/get', 'card_id': 'test', }) msg = await client.receive_json() assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success'] assert msg['result'] == 'id: test\ntype: entities\ntitle: Test card\n'
async def test_lovelace_get_view_not_found(hass, hass_ws_client): """Test get_card command cannot find card.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)): await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/get', 'view_id': 'not_found', }) msg = await client.receive_json() assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success'] is False assert msg['error']['code'] == 'view_not_found'
def _readConfig(self, fileName): try: with open(fileName, "r") as config: yaml = YAML() configData = yaml.load(config) if not configData: configData = {} except Exception as e: raise ConfigError(fileName, e) if "include" in configData: for fileName in configData["include"]: includeConfig = self._readConfig(fileName) for key, val in includeConfig.iteritems(): if key not in configData: configData[key] = val elif not isinstance(configData[key], basestring): # Let's try to merge them if they're collections if isinstance(val, basestring): raise ConfigError(fileName, "The included configuration file tried to merge a non-string " "with a string.") try: # Make sure both things we're merging are still iterable types (not numbers or whatever) iter(configData[key]) iter(val) except TypeError: pass # Just don't merge them if they're not else: try: configData[key] += val # Merge with the + operator except TypeError: # Except that some collections (dicts) can't try: for subkey, subval in val.iteritems(): # So merge them manually if subkey not in configData[key]: configData[key][subkey] = subval except (AttributeError, TypeError): # If either of these, they weren't both dicts (but were still iterable); # requires user to resolve raise ConfigError(fileName, "The variable {} could not be successfully merged " "across files.".format(key)) del configData["include"] return configData
async def test_lovelace_update_card_bad_yaml(hass, hass_ws_client): """Test update_card command bad yaml.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.yaml_to_object', side_effect=HomeAssistantError): await client.send_json({ 'id': 5, 'type': 'lovelace/config/card/update', 'card_id': 'test', 'card_config': 'id: test\ntype: glance\n', }) msg = await client.receive_json() assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success'] is False assert msg['error']['code'] == 'error'
async def test_lovelace_add_view(hass, hass_ws_client): """Test add_view command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/add', 'view_config': 'id: test\ntitle: added\n', }) msg = await client.receive_json() result = save_yaml_mock.call_args_list[0][0][1] assert result.mlget(['views', 2, 'title'], list_ok=True) == 'added' assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success']
async def test_lovelace_update_card(hass, hass_ws_client): """Test update_card command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: await client.send_json({ 'id': 5, 'type': 'lovelace/config/card/update', 'card_id': 'test', 'card_config': 'id: test\ntype: glance\n', }) msg = await client.receive_json() result = save_yaml_mock.call_args_list[0][0][1] assert result.mlget(['views', 1, 'cards', 0, 'type'], list_ok=True) == 'glance' assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success']
async def test_lovelace_delete_view(hass, hass_ws_client): """Test delete_card command.""" await async_setup_component(hass, 'lovelace') client = await hass_ws_client(hass) yaml = YAML(typ='rt') with patch('homeassistant.util.ruamel_yaml.load_yaml', return_value=yaml.load(TEST_YAML_A)), \ patch('homeassistant.util.ruamel_yaml.save_yaml') \ as save_yaml_mock: await client.send_json({ 'id': 5, 'type': 'lovelace/config/view/delete', 'view_id': 'example', }) msg = await client.receive_json() result = save_yaml_mock.call_args_list[0][0][1] views = result.get('views', []) assert len(views) == 1 assert views[0]['title'] == 'Second view' assert msg['id'] == 5 assert msg['type'] == TYPE_RESULT assert msg['success']
def parser_from_buffer(cls, fp): """Construct YamlParser from a file pointer.""" yaml = YAML(typ="safe") return cls(yaml.load(fp))
mask = connected_components == largest_component_label return mask.astype(float) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Data processing') parser.add_argument('-c', '--config', default='config.json', type=str, help='Path to the config file (default: config.json)') args = parser.parse_args() yaml = YAML(typ='safe') with open('config.yaml', encoding='utf-8') as file: config = yaml.load(file) # 为列表类型 save_pickle( config["path"] + "/DRIVE/training", *data_process(config["path"], name="DRIVE", mode="training", **config["data_process"])) save_pickle( config["path"] + "/DRIVE/test", *data_process(config["path"], name="DRIVE", mode="test", **config["data_process"])) save_pickle( config["path"] + "/CHASEDB1", *data_process(config["path"],
from ipam.models import VLAN, VLANGroup, Role from ipam.constants import VLAN_STATUS_CHOICES from tenancy.models import Tenant, TenantGroup from extras.models import CustomField, CustomFieldValue from ruamel.yaml import YAML from pathlib import Path import sys file = Path('/opt/netbox/initializers/vlans.yml') if not file.is_file(): sys.exit() with file.open('r') as stream: yaml = YAML(typ='safe') vlans = yaml.load(stream) optional_assocs = { 'site': (Site, 'name'), 'tenant': (Tenant, 'name'), 'tenant_group': (TenantGroup, 'name'), 'group': (VLANGroup, 'name'), 'role': (Role, 'name') } if vlans is not None: for params in vlans: custom_fields = params.pop('custom_fields', None) for assoc, details in optional_assocs.items(): if assoc in params:
def getconfig(): dados = open('config.yaml', 'r').read() yaml = YAML() data = yaml.load(dados) cdata = json.loads(json.dumps(data)) return cdata
"%s has values (%s) not found in owl entity dictionaries t (%s): " % (field.full_path, str( quoted.difference(owl_entities)), str(owl_entities))) stat = False else: warnings.warn("Pattern has no text fields") return stat schema_url = 'https://raw.githubusercontent.com/dosumis/dead_simple_owl_design_patterns/master/spec/DOSDP_schema_full.yaml' dosdp_full_text = requests.get(schema_url) ryaml = YAML(typ='safe') dosdp = ryaml.load(dosdp_full_text.text) # TODO - Add better parsing for ryaml execptions. v = Draft7Validator(dosdp) pattern_docs = glob.glob(sys.argv[1] + "*.yaml") pattern_docs.extend(glob.glob(sys.argv[1] + "*.yml")) stat = True for pattern_doc in pattern_docs: warnings.warn("Checking %s" % pattern_doc) file = open(pattern_doc, "r") pattern = ryaml.load(file.read()) if not test_jschema(v, pattern): stat = False if not test_vars(pattern): stat = False if not test_text_fields(pattern): stat = False
# Get the path to the definition file. path_to_file = os.path.join(dirpath, def_file) # Get the type of HTTP method from the file name by removing the ".yaml" file # extension from the string. http_method = def_file[:-len(".yaml")] # File names for our resource definitions should be in the form of: # "get.yaml" # "post.yaml" # or some other RESTful HTTP method that we support in our API. If the current # file being processed isn't like that, then we skip over that file since it's # not a resource definition. if http_method not in ALLOWED_HTTP_METHODS: continue # Load the yaml data from the current file being processed with open(path_to_file, "r") as yaml_file: definition = yaml.load(yaml_file) resource_url_path = RESOURCE_PATH_LOOKUP[resource_name] # Set the value of the resource path in the base dictionary to the # definition we loaded from the yaml file. It will look something like this # in the paths dictionary: # # paths: # /: # get: # ... openapi_definition['paths'][resource_url_path][ http_method] = definition # Get the date and time that the script was run, then generate a name for the definition # file. # We generate the file name with the date and time so that we can keep different versions
class SavedBundleConfig(object): def __init__(self, bento_service=None, kind="BentoService"): self.kind = kind self._yaml = YAML() self._yaml.default_flow_style = False self.config = self._yaml.load( BENTOML_CONFIG_YAML_TEPMLATE.format( kind=self.kind, bentoml_version=get_bentoml_deploy_version(), created_at=str(datetime.utcnow()), ) ) if bento_service is not None: self.config["metadata"].update( { "service_name": bento_service.name, "service_version": bento_service.version, } ) self.config["env"] = bento_service.env.to_dict() self.config['apis'] = _get_apis_list(bento_service) self.config['artifacts'] = _get_artifacts_list(bento_service) def write_to_path(self, path, filename="bentoml.yml"): return self._yaml.dump(self.config, Path(os.path.join(path, filename))) @classmethod def load(cls, filepath): conf = cls() with open(filepath, "rb") as config_file: yml_content = config_file.read() conf.config = conf._yaml.load(yml_content) ver = str(conf["version"]) if ver != BENTOML_VERSION: msg = ( "Saved BentoService bundle version mismatch: loading BentoService " "bundle create with BentoML version {}, but loading from BentoML " "version {}".format(conf["version"], BENTOML_VERSION) ) # If major version is different, then there could be incompatible API # changes. Raise error in this case. if ver.split(".")[0] != BENTOML_VERSION.split(".")[0]: if not BENTOML_VERSION.startswith('0+untagged'): raise BentoMLConfigException(msg) else: logger.warning(msg) else: # Otherwise just show a warning. logger.warning(msg) return conf def get_bento_service_metadata_pb(self): bento_service_metadata = BentoServiceMetadata() bento_service_metadata.name = self.config["metadata"]["service_name"] bento_service_metadata.version = self.config["metadata"]["service_version"] bento_service_metadata.created_at.FromDatetime( self.config["metadata"]["created_at"] ) if "env" in self.config: if "setup_sh" in self.config["env"]: bento_service_metadata.env.setup_sh = self.config["env"]["setup_sh"] if "conda_env" in self.config["env"]: bento_service_metadata.env.conda_env = dump_to_yaml_str( self.config["env"]["conda_env"] ) if "pip_dependencies" in self.config["env"]: bento_service_metadata.env.pip_dependencies = "\n".join( self.config["env"]["pip_dependencies"] ) if "python_version" in self.config["env"]: bento_service_metadata.env.python_version = self.config["env"][ "python_version" ] if "docker_base_image" in self.config["env"]: bento_service_metadata.env.docker_base_image = self.config["env"][ "docker_base_image" ] if "apis" in self.config: for api_config in self.config["apis"]: if 'handler_type' in api_config: # Convert handler type to input type for saved bundle created # before version 0.8.0 input_type = api_config.get('handler_type') elif 'input_type' in api_config: input_type = api_config.get('input_type') else: input_type = "unknown" if 'output_type' in api_config: output_type = api_config.get('output_type') else: output_type = "DefaultOutput" api_metadata = BentoServiceMetadata.BentoServiceApi( name=api_config["name"], docs=api_config["docs"], input_type=input_type, output_type=output_type, ) if "handler_config" in api_config: # Supports viewing API input config info for saved bundle created # before version 0.8.0 for k, v in api_config["handler_config"].items(): if k in {'mb_max_latency', 'mb_max_batch_size'}: setattr(api_metadata, k, v) else: api_metadata.input_config[k] = v else: if 'mb_max_latency' in api_config: api_metadata.mb_max_latency = api_config["mb_max_latency"] else: api_metadata.mb_max_latency = DEFAULT_MAX_LATENCY if 'mb_max_batch_size' in api_config: api_metadata.mb_max_batch_size = api_config["mb_max_batch_size"] else: api_metadata.mb_max_batch_size = DEFAULT_MAX_BATCH_SIZE if "input_config" in api_config: for k, v in api_config["input_config"].items(): api_metadata.input_config[k] = v if "output_config" in api_config: for k, v in api_config["output_config"].items(): api_metadata.output_config[k] = v bento_service_metadata.apis.extend([api_metadata]) if "artifacts" in self.config: for artifact_config in self.config["artifacts"]: artifact_metadata = BentoServiceMetadata.BentoArtifact() if "name" in artifact_config: artifact_metadata.name = artifact_config["name"] if "artifact_type" in artifact_config: artifact_metadata.artifact_type = artifact_config["artifact_type"] bento_service_metadata.artifacts.extend([artifact_metadata]) return bento_service_metadata def __getitem__(self, item): return self.config[item] def __setitem__(self, key, value): self.config[key] = value def __contains__(self, item): return item in self.config
def offline_inference(config, encoder, decoder, audio_file): MODEL_YAML = config CHECKPOINT_ENCODER = encoder CHECKPOINT_DECODER = decoder sample_rate, signal = wave.read(audio_file) # get labels (vocab) yaml = YAML(typ="safe") with open(MODEL_YAML) as f: jasper_model_definition = yaml.load(f) labels = jasper_model_definition['labels'] # build neural factory and neural modules neural_factory = nemo.core.NeuralModuleFactory( placement=nemo.core.DeviceType.GPU, backend=nemo.core.Backend.PyTorch) data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( factory=neural_factory, **jasper_model_definition["AudioToMelSpectrogramPreprocessor"]) jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_model_definition["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_model_definition["JasperEncoder"]) jasper_decoder = nemo_asr.JasperDecoderForCTC( feat_in=jasper_model_definition["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(labels)) greedy_decoder = nemo_asr.GreedyCTCDecoder() # load model jasper_encoder.restore_from(CHECKPOINT_ENCODER) jasper_decoder.restore_from(CHECKPOINT_DECODER) # AudioDataLayer class AudioDataLayer(DataLayerNM): @staticmethod def create_ports(): input_ports = {} output_ports = { "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), "a_sig_length": NeuralType({0: AxisType(BatchTag)}), } return input_ports, output_ports def __init__(self, **kwargs): DataLayerNM.__init__(self, **kwargs) self.output_enable = False def __iter__(self): return self def __next__(self): if not self.output_enable: raise StopIteration self.output_enable = False return torch.as_tensor(self.signal, dtype=torch.float32), \ torch.as_tensor(self.signal_shape, dtype=torch.int64) def set_signal(self, signal): self.signal = np.reshape(signal.astype(np.float32)/32768., [1, -1]) self.signal_shape = np.expand_dims(self.signal.size, 0).astype(np.int64) self.output_enable = True def __len__(self): return 1 @property def dataset(self): return None @property def data_iterator(self): return self # Instantiate necessary neural modules data_layer = AudioDataLayer() # Define inference DAG audio_signal, audio_signal_len = data_layer() processed_signal, processed_signal_len = data_preprocessor( input_signal=audio_signal, length=audio_signal_len) encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len) log_probs = jasper_decoder(encoder_output=encoded) predictions = greedy_decoder(log_probs=log_probs) # audio inference data_layer.set_signal(signal) tensors = neural_factory.infer([ audio_signal, processed_signal, encoded, log_probs, predictions], verbose=False) # results audio = tensors[0][0][0].cpu().numpy() features = tensors[1][0][0].cpu().numpy() encoded_features = tensors[2][0][0].cpu().numpy(), probs = tensors[3][0][0].cpu().numpy() preds = tensors[4][0] transcript = post_process_predictions([preds], labels) return transcript, audio, features, encoded_features, probs, preds
def callback(data): yaml = YAML() return yaml.load(data)
def main(config=None): print(banner) parser = argparse.ArgumentParser( description='Boa, the fast build tool for conda packages.') subparsers = parser.add_subparsers(help='sub-command help', dest='command') parent_parser = argparse.ArgumentParser(add_help=False) parent_parser.add_argument('recipe_dir', type=str) render_parser = subparsers.add_parser('render', parents=[parent_parser], help='render a recipe') build_parser = subparsers.add_parser('build', parents=[parent_parser], help='build a recipe') args = parser.parse_args() command = args.command folder = args.recipe_dir config = get_or_merge_config(None, {}) config_files = find_config_files(folder) parsed_cfg = collections.OrderedDict() for f in config_files: parsed_cfg[f] = parse_config_file(f, config) normalized = {} for k in parsed_cfg[f].keys(): if "_" in k: n = k.replace("_", "-") normalized[n] = parsed_cfg[f][k] parsed_cfg[f].update(normalized) # TODO just using latest config here, should merge! if len(config_files): cbc = parsed_cfg[config_files[-1]] else: cbc = {} update_index(os.path.dirname(config.output_folder), verbose=config.debug, threads=1) recipe_path = os.path.join(folder, "recipe.yaml") # step 1: parse YAML with open(recipe_path) as fi: loader = YAML(typ="safe") ydoc = loader.load(fi) # step 2: fill out context dict context_dict = ydoc.get("context") or {} jenv = jinja2.Environment() for key, value in context_dict.items(): if isinstance(value, str): tmpl = jenv.from_string(value) context_dict[key] = tmpl.render(context_dict) if ydoc.get("context"): del ydoc["context"] # step 3: recursively loop over the entire recipe and render jinja with context jenv.globals.update(jinja_functions(config, context_dict)) for key in ydoc: render_recursive(ydoc[key], context_dict, jenv) flatten_selectors(ydoc, ns_cfg(config)) # We need to assemble the variants for each output variants = {} # if we have a outputs section, use that order the outputs if ydoc.get("outputs"): # if ydoc.get("build"): # raise InvalidRecipeError("You can either declare outputs, or build?") for o in ydoc["outputs"]: # inherit from global package pkg_meta = {} pkg_meta.update(ydoc["package"]) pkg_meta.update(o["package"]) o["package"] = pkg_meta build_meta = {} build_meta.update(ydoc.get("build")) build_meta.update(o.get("build") or {}) o["build"] = build_meta variants[o["package"]["name"]] = get_dependency_variants( o["requirements"], cbc, config) else: # we only have one output variants[ydoc["package"]["name"]] = get_dependency_variants( ydoc["requirements"], cbc, config) # this takes in all variants and outputs, builds a dependency tree and returns # the final metadata sorted_outputs = to_build_tree(ydoc, variants, config) # then we need to solve and build from the bottom up # we can't first solve all packages without finalizing everything # - solve the package # - solv build, add weak run exports to # - add run exports from deps! if command == 'render': for o in sorted_outputs: print(o) exit() solver = MambaSolver(["conda-forge"], "linux-64") for o in sorted_outputs: solver.replace_channels() o.finalize_solve(sorted_outputs, solver) print(o) o.config.compute_build_id(o.name) print(o.config.host_prefix) if 'build' in o.transactions: mkdir_p(o.config.build_prefix) print(o.transactions) o.transactions['build'].execute( PrefixData(o.config.build_prefix), PackageCacheData.first_writable().pkgs_dir) if 'host' in o.transactions: mkdir_p(o.config.host_prefix) print(o.transactions) o.transactions['host'].execute( PrefixData(o.config.host_prefix), PackageCacheData.first_writable().pkgs_dir) print(o.sections) stats = {} print("Final variant config") print(config.variant) print(o.variant) build(MetaData(recipe_path, o), None) # sorted_outputs # print(sorted_outputs[0].config.host_prefix) exit() for o in sorted_outputs: print("\n") print(o)
""" Enables recursive dot notation for ``dict``. """ return json.loads(json.dumps(inpt), object_hook=lambda x: Bunch(**{ **Bunch(), **x })) # Read the BIDS schema data with (BIDSCOIN_SCHEMA_DIR.joinpath('objects/datatypes.yaml') ).open('r') as _stream: bidsdatatypesdef = yaml.load( _stream ) # The valid BIDS datatypes, along with their full names and descriptions bidsdatatypes = {} for _datatype in bidsdatatypesdef: # The entities that can/should be present for each BIDS datatype with (BIDSCOIN_SCHEMA_DIR.joinpath(f'rules/datatypes/{_datatype}') ).with_suffix('.yaml').open('r') as _stream: bidsdatatypes[_datatype] = yaml.load(_stream) with (BIDSCOIN_SCHEMA_DIR.joinpath('objects/suffixes.yaml') ).open('r') as _stream: suffixes = yaml.load( _stream) # The descriptions of the valid BIDS file suffixes with (BIDSCOIN_SCHEMA_DIR.joinpath('objects/entities.yaml') ).open('r') as _stream: entities = yaml.load( _stream) # The descriptions of the entities present in BIDS filenames with (BIDSCOIN_SCHEMA_DIR.joinpath('rules/entities.yaml')
'conda', 'smithy', 'register-feedstock-token', '--feedstock_directory', feedstock_dir ] + owner_info) write_token('anaconda', os.environ['STAGING_BINSTAR_TOKEN']) subprocess.check_call([ 'conda', 'smithy', 'rotate-binstar-token', '--without-appveyor', '--token_name', 'STAGING_BINSTAR_TOKEN' ], cwd=feedstock_dir) yaml = YAML() with open(os.path.join(feedstock_dir, "conda-forge.yml"), "r") as fp: _cfg = yaml.load(fp.read()) _cfg["conda_forge_output_validation"] = True with open(os.path.join(feedstock_dir, "conda-forge.yml"), "w") as fp: yaml.dump(_cfg, fp) subprocess.check_call(["git", "add", "conda-forge.yml"], cwd=feedstock_dir) subprocess.check_call(['conda', 'smithy', 'rerender'], cwd=feedstock_dir) except subprocess.CalledProcessError: exit_code = 0 traceback.print_exception(*sys.exc_info()) continue print("making a commit and pushing...") subprocess.check_call([
from ruamel.yaml import YAML import requests """ This script print all the tools that do not have a corresponding biotools accession or doi. """ yaml = YAML() yaml_recipe = YAML(typ="rt") # pylint: disable=invalid-name with open('../annotations.yaml', 'r') as read_file: file_annotations = yaml.load(read_file) tools = {} not_biotools = [] def search_tool(key): count = 0 url = 'https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=' + key + '&format=json&pageSize=1000' try: page = requests.get(url).json() if 'resultList' in page: for publication in page['resultList']['result']: common_name = key + ":" # if common_name in publication['title'].lower() and ( # 'nmeth.' in publication['doi'] or 'bioinformatics' in publication['doi'] or 'nar\/' in publication['doi'] or 'gigascience' in publication['doi'] or 'nbt.' in publication['doi']): # print(key + ' ---- ' + publication['title'] + ' --- ' + publication['doi']) if common_name in publication['title'].lower(): print(key + ' ---- ' + publication['title'] + ' --- ' + ' -' + ' doi:' + publication['doi'])
#!/usr/bin/python3 # Changes namespace, name, and version in Galaxy metadata. # Useful for releasing to Automation Hub, where Collections live # in namespaces separated from Ansible Galaxy. import sys from ruamel.yaml import YAML filepath = "galaxy.yml" buf = open(filepath).read() yaml = YAML(typ="rt") yaml.default_flow_style = False yaml.preserve_quotes = True yaml.indent(mapping=2, sequence=4, offset=2) code = yaml.load(buf) code["namespace"] = sys.argv[1] code["name"] = sys.argv[2] code["version"] = sys.argv[3] yaml.dump(code, sys.stdout)
class Unifier: def __init__(self, indir: str, dir_name=INTEGRATIONS_DIR, outdir='', image_prefix=DEFAULT_IMAGE_PREFIX): directory_name = "" for optional_dir_name in DIR_TO_PREFIX: if optional_dir_name in indir: directory_name = optional_dir_name if not directory_name: print_error( 'You have failed to provide a legal file path, a legal file path ' 'should contain either Integrations or Scripts directories') self.image_prefix = image_prefix self.package_path = indir if self.package_path.endswith(os.sep): self.package_path = self.package_path.rstrip(os.sep) self.dest_path = outdir yml_paths, self.yml_path = get_yml_paths_in_dir( self.package_path, Errors.no_yml_file(self.package_path)) for path in yml_paths: # The plugin creates a unified YML file for the package. # In case this script runs locally and there is a unified YML file in the package we need to ignore it. # Also, # we don't take the unified file by default because # there might be packages that were not created by the plugin. if 'unified' not in path: self.yml_path = path break self.ryaml = YAML() self.ryaml.preserve_quotes = True self.ryaml.width = 400 # make sure long lines will not break (relevant for code section) if self.yml_path: with open(self.yml_path, 'r') as yml_file: self.yml_data = self.ryaml.load(yml_file) else: self.yml_data = {} print_error(f'No yml found in path: {self.package_path}') # script key for scripts is a string. # script key for integrations is a dictionary. self.is_script_package = isinstance(self.yml_data.get('script'), str) self.dir_name = SCRIPTS_DIR if self.is_script_package else dir_name def write_yaml_with_docker(self, yml_unified, yml_data, script_obj): """Write out the yaml file taking into account the dockerimage45 tag. If it is present will create 2 integration files One for 4.5 and below and one for 5.0. Arguments: output_path {str} -- output path yml_unified {dict} -- unified yml dict yml_data {dict} -- yml object script_obj {dict} -- script object Returns: dict -- dictionary mapping output path to unified data """ output_map = {self.dest_path: yml_unified} if 'dockerimage45' in script_obj: # we need to split into two files 45 and 50. Current one will be from version 5.0 if self.is_script_package: # scripts del yml_unified['dockerimage45'] else: # integrations del yml_unified['script']['dockerimage45'] yml_unified45 = copy.deepcopy(yml_unified) # validate that this is a script/integration which targets both 4.5 and 5.0+. if server_version_compare(yml_data.get('fromversion', '0.0.0'), '5.0.0') >= 0: raise ValueError( f'Failed: {self.dest_path}. dockerimage45 set for 5.0 and later only' ) yml_unified['fromversion'] = '5.0.0' # validate that this is a script/integration which targets both 4.5 and 5.0+. if server_version_compare(yml_data.get('toversion', '99.99.99'), '5.0.0') < 0: raise ValueError( f'Failed: {self.dest_path}. dockerimage45 set for 4.5 and earlier only' ) yml_unified45['toversion'] = '4.5.9' if script_obj.get( 'dockerimage45' ): # we have a value for dockerimage45 set it as dockerimage if self.is_script_package: # scripts yml_unified45['dockerimage'] = script_obj.get( 'dockerimage45') else: # integrations yml_unified45['script']['dockerimage'] = script_obj.get( 'dockerimage45') else: # no value for dockerimage45 remove the dockerimage entry del yml_unified45['dockerimage'] output_path45 = re.sub(r'\.yml$', '_45.yml', self.dest_path) output_map = { self.dest_path: yml_unified, output_path45: yml_unified45, } for file_path, file_data in output_map.items(): if os.path.isfile(file_path): raise ValueError( f'Output file already exists: {self.dest_path}.' ' Make sure to remove this file from source control' ' or rename this package (for example if it is a v2).') with io.open(file_path, mode='w', encoding='utf-8') as file_: self.ryaml.dump(file_data, file_) return output_map def merge_script_package_to_yml(self): """Merge the various components to create an output yml file """ print("Merging package: {}".format(self.package_path)) package_dir_name = os.path.basename(self.package_path) output_filename = '{}-{}.yml'.format(DIR_TO_PREFIX[self.dir_name], package_dir_name) if self.dest_path: self.dest_path = os.path.join(self.dest_path, output_filename) else: self.dest_path = os.path.join(self.dir_name, output_filename) script_obj = self.yml_data if not self.is_script_package: script_obj = self.yml_data['script'] script_type = TYPE_TO_EXTENSION[script_obj['type']] yml_unified = copy.deepcopy(self.yml_data) yml_unified, script_path = self.insert_script_to_yml( script_type, yml_unified, self.yml_data) image_path = None desc_path = None if not self.is_script_package: yml_unified, image_path = self.insert_image_to_yml( self.yml_data, yml_unified) yml_unified, desc_path = self.insert_description_to_yml( self.yml_data, yml_unified) output_map = self.write_yaml_with_docker(yml_unified, self.yml_data, script_obj) unifier_outputs = list(output_map.keys( )), self.yml_path, script_path, image_path, desc_path print_color(f'Created unified yml: {list(output_map.keys())}', LOG_COLORS.GREEN) return unifier_outputs[0] def insert_image_to_yml(self, yml_data, yml_unified): image_data, found_img_path = self.get_data("*png") image_data = self.image_prefix + base64.b64encode(image_data).decode( 'utf-8') if yml_data.get('image'): raise ValueError( 'Please move the image from the yml to an image file (.png)' f' in the package: {self.package_path}') yml_unified['image'] = image_data return yml_unified, found_img_path def insert_description_to_yml(self, yml_data, yml_unified): desc_data, found_desc_path = self.get_data('*_description.md') if yml_data.get('detaileddescription'): raise ValueError( 'Please move the detailed description from the yml to a description file (.md)' f' in the package: {self.package_path}') if desc_data: yml_unified['detaileddescription'] = FoldedScalarString( desc_data.decode('utf-8')) return yml_unified, found_desc_path def get_data(self, extension): data_path = glob.glob(os.path.join(self.package_path, extension)) data = None found_data_path = None if not self.is_script_package and data_path: found_data_path = data_path[0] with open(found_data_path, 'rb') as data_file: data = data_file.read() return data, found_data_path def get_code_file(self, script_type): """Return the first code file in the specified directory path :param script_type: script type: .py or .js :type script_type: str :return: path to found code file :rtype: str """ ignore_regex = ( r'CommonServerPython\.py|CommonServerUserPython\.py|demistomock\.py|_test\.py' r'|conftest\.py|__init__\.py|ApiModule\.py') if self.package_path.endswith('Scripts/CommonServerPython'): return os.path.join(self.package_path, 'CommonServerPython.py') if self.package_path.endswith('ApiModule'): return os.path.join( self.package_path, os.path.basename(os.path.normpath(self.package_path)) + '.py') script_path = list( filter( lambda x: not re.search(ignore_regex, x), glob.glob(os.path.join(self.package_path, '*' + script_type))))[0] return script_path def insert_script_to_yml(self, script_type, yml_unified, yml_data): script_path = self.get_code_file(script_type) with io.open(script_path, mode='r', encoding='utf-8') as script_file: script_code = script_file.read() # Check if the script imports an API module. If it does, # the API module code will be pasted in place of the import. module_import, module_name = self.check_api_module_imports(script_code) if module_import: script_code = self.insert_module_code(script_code, module_import, module_name) clean_code = self.clean_python_code(script_code) if self.is_script_package: if yml_data.get('script', '') not in ('', '-'): print_warning( f'Script section is not empty in package {self.package_path}.' f'It should be blank or a dash(-).') yml_unified['script'] = FoldedScalarString(clean_code) else: if yml_data['script'].get('script', '') not in ('', '-'): print_warning( f'Script section is not empty in package {self.package_path}.' f'It should be blank or a dash(-).') yml_unified['script']['script'] = FoldedScalarString(clean_code) return yml_unified, script_path def get_script_package_data(self): # should be static method _, yml_path = get_yml_paths_in_dir(self.package_path, error_msg='') if not yml_path: raise Exception( f'No yml files found in package path: {self.package_path}. ' 'Is this really a package dir?') code_type = get_yaml(yml_path).get('type') unifier = Unifier(self.package_path) code_path = unifier.get_code_file(TYPE_TO_EXTENSION[code_type]) with open(code_path, 'r') as code_file: code = code_file.read() return yml_path, code @staticmethod def check_api_module_imports(script_code: str) -> Tuple[str, str]: """ Checks integration code for API module imports :param script_code: The integration code :return: The import string and the imported module name """ # General regex to find API module imports, for example: "from MicrosoftApiModule import * # noqa: E402" module_regex = r'from ([\w\d]+ApiModule) import \*(?: # noqa: E402)?' module_match = re.search(module_regex, script_code) if module_match: return module_match.group(), module_match.group(1) return '', '' @staticmethod def insert_module_code(script_code: str, module_import: str, module_name: str) -> str: """ Inserts API module in place of an import to the module according to the module name :param script_code: The integration code :param module_import: The module import string to replace :param module_name: The module name :return: The integration script with the module code appended in place of the import """ module_path = os.path.join('./Packs', 'ApiModules', 'Scripts', module_name, module_name + '.py') module_code = Unifier._get_api_module_code(module_name, module_path) module_code = '\n### GENERATED CODE ###\n# This code was inserted in place of an API module.{}\n' \ .format(module_code) return script_code.replace(module_import, module_code) @staticmethod def _get_api_module_code(module_name, module_path): """ Attempts to get the API module code from the ApiModules pack. :param module_name: The API module name :param module_path: The API module code file path :return: The API module code """ try: with io.open(module_path, mode='r', encoding='utf-8') as script_file: module_code = script_file.read() except Exception as exc: raise ValueError( 'Could not retrieve the module [{}] code: {}'.format( module_name, str(exc))) return module_code @staticmethod def clean_python_code(script_code, remove_print_future=True): script_code = script_code.replace("import demistomock as demisto", "") script_code = script_code.replace("from CommonServerPython import *", "") script_code = script_code.replace( "from CommonServerUserPython import *", "") # print function is imported in python loop if remove_print_future: # docs generation requires to leave this script_code = script_code.replace( "from __future__ import print_function", "") return script_code
def __init__(self, meta_yaml): _yml = YAML(typ='jinja2') _yml.indent(mapping=2, sequence=4, offset=2) _yml.width = 160 _yml.allow_duplicate_keys = True self.meta = _yml.load(meta_yaml)
def main(): parser = argparse.ArgumentParser(description='Jasper') parser.add_argument("--local_rank", default=None, type=int) parser.add_argument("--batch_size", default=32, type=int) parser.add_argument("--model_config", type=str, required=True) parser.add_argument("--eval_datasets", type=str, required=True) parser.add_argument("--load_dir", type=str, required=True) parser.add_argument("--vocab_file", type=str, required=True) parser.add_argument("--save_logprob", default=None, type=str) parser.add_argument("--lm_path", default=None, type=str) parser.add_argument("--beam_width", default=50, type=int) parser.add_argument("--alpha", default=2.0, type=float) parser.add_argument("--beta", default=1.0, type=float) parser.add_argument("--cutoff_prob", default=0.99, type=float) parser.add_argument("--cutoff_top_n", default=40, type=int) args = parser.parse_args() batch_size = args.batch_size load_dir = args.load_dir if args.local_rank is not None: if args.lm_path: raise NotImplementedError( "Beam search decoder with LM does not currently support evaluation on multi-gpu." ) device = nemo.core.DeviceType.AllGpu else: device = nemo.core.DeviceType.GPU # Instantiate Neural Factory with supported backend neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=nemo.core.Optimization.mxprO1, placement=device, ) if args.local_rank is not None: logging.info('Doing ALL GPU') yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) vocab = load_vocab(args.vocab_file) sample_rate = jasper_params['sample_rate'] eval_datasets = args.eval_datasets eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"]) eval_dl_params["normalize_transcripts"] = False del eval_dl_params["train"] del eval_dl_params["eval"] data_layer = nemo_asr.AudioToTextDataLayer( manifest_filepath=eval_datasets, sample_rate=sample_rate, labels=vocab, batch_size=batch_size, **eval_dl_params, ) n = len(data_layer) logging.info('Evaluating {0} examples'.format(n)) data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"], ) jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"], ) jasper_decoder = nemo_asr.JasperDecoderForCTC( feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab), ) greedy_decoder = nemo_asr.GreedyCTCDecoder() if args.lm_path: beam_width = args.beam_width alpha = args.alpha beta = args.beta cutoff_prob = args.cutoff_prob cutoff_top_n = args.cutoff_top_n beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM( vocab=vocab, beam_width=beam_width, alpha=alpha, beta=beta, cutoff_prob=cutoff_prob, cutoff_top_n=cutoff_top_n, lm_path=args.lm_path, num_cpus=max(os.cpu_count(), 1), ) logging.info('================================') logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}") logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}") logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}") logging.info('================================') (audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1,) = data_layer() processed_signal_e1, p_length_e1 = data_preprocessor(input_signal=audio_signal_e1, length=a_sig_length_e1) encoded_e1, encoded_len_e1 = jasper_encoder(audio_signal=processed_signal_e1, length=p_length_e1) log_probs_e1 = jasper_decoder(encoder_output=encoded_e1) predictions_e1 = greedy_decoder(log_probs=log_probs_e1) eval_tensors = [ log_probs_e1, predictions_e1, transcript_e1, transcript_len_e1, encoded_len_e1, ] if args.lm_path: beam_predictions_e1 = beam_search_with_lm(log_probs=log_probs_e1, log_probs_length=encoded_len_e1) eval_tensors.append(beam_predictions_e1) evaluated_tensors = neural_factory.infer(tensors=eval_tensors, checkpoint_dir=load_dir,) greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab) references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab) cer = word_error_rate(hypotheses=greedy_hypotheses, references=references, use_cer=True) logging.info("Greedy CER {:.2f}%".format(cer * 100)) if args.lm_path: beam_hypotheses = [] # Over mini-batch for i in evaluated_tensors[-1]: # Over samples for j in i: beam_hypotheses.append(j[0][1]) cer = word_error_rate(hypotheses=beam_hypotheses, references=references, use_cer=True) logging.info("Beam CER {:.2f}".format(cer * 100)) if args.save_logprob: # Convert logits to list of numpy arrays logprob = [] for i, batch in enumerate(evaluated_tensors[0]): for j in range(batch.shape[0]): logprob.append(batch[j][: evaluated_tensors[4][i][j], :].cpu().numpy()) with open(args.save_logprob, 'wb') as f: pickle.dump(logprob, f, protocol=pickle.HIGHEST_PROTOCOL)
def startup(): if not os.path.exists(CONFIG_PATH): info("Foxify Directory Missing! Creating One For You...") os.makedirs(CONFIG_PATH) if not os.path.exists(DEFAULT_THEME_PATH): os.makedirs(DEFAULT_THEME_PATH) if not os.path.exists(DEFAULT_TWEAK_PATH): os.makedirs(DEFAULT_TWEAK_PATH) if not os.path.exists(DEFAULT_CONFIG): while True: info("If you have not yet setup userChrome CSS Cusotmization\nPlease Open Up Your Firefox Browser and Follow These Steps:") print("""\ 1. Go to "about:support" by typing it into your Address Bar 2. Copy the File Path for your Profile Folder 3. Enter it below""") filepath = input("> ") print("You Entered:", filepath.strip()) print("Is this correct? Y\\n") ans = input("> ") if ans.lower() == "y": DCONF['active_profile'] = os.path.realpath(filepath.strip()) info("Writing Default Configuration...") with open(DEFAULT_CONFIG, 'w') as f: yaml = YAML() yaml.default_flow_style = False yaml.dump(DCONF, f) info("Checking If userChrome CSS Customization is Enabled") with open(DCONF['active_profile'] + '/prefs.js', 'r') as f: match = False deact_match = False for line in f.readlines(): if line == '"user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", true);"': match = True if line == '"user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", false);"': deact_match = True if not match: info('Enabling userChrome CSS Customization') with open(DCONF['active_profile'] + '/prefs.js', 'a') as f: f.write('user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", false);') if not match and deact_match: info('Enabling userChrome CSS Customization') with open(DCONF['active_profile'] + '/prefs.js', 'w') as f: content = f.read() content = content.replace('user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", false);', 'user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", true);') f.write() info('Checking For Chrome and Backup Directory') if not os.path.exists(DCONF['active_profile'] + '/chrome'): os.makedirs(DCONF['active_profile'] + '/chrome') if not os.path.exists(DCONF['active_profile'] + '/chrome_backup'): os.makedirs(DCONF['active_profile'] + '/chrome_backup') info('Chrome Directory and Backup Directory Created') break else: pass else: with open(DEFAULT_CONFIG, 'r') as f: yaml = YAML(typ='safe') config = yaml.load(f) if not config.get('config_version'): for k, v in DCONF.items(): if not config.get(k): config[k] = v with open(DEFAULT_CONFIG, 'w') as f: yaml = YAML() yaml.default_flow_style = False yaml.dump(config, f) if config['config_version'] != CONFIG_VERSION: for k, v in DCONF.items(): if not config.get(k): config[k] = v with open(DEFAULT_CONFIG, 'w') as f: yaml = YAML() yaml.default_flow_style = False yaml.dump(config, f) if config['check_for_updates']: res = requests.get('https://raw.githubusercontent.com/M4cs/foxify-cli/master/version').text if res == version: config['version'] = version with open(DEFAULT_CONFIG, 'w') as f: yaml = YAML() yaml.default_flow_style = False yaml.dump(config, f) else: info("Update Available! Run 'pip3 install --upgrade foxify-cli' to Update to Version: " + res)
key = newkey newkey = '' lineNum += 1 return ret_val with open('index.md', 'r') as f: endReached = False data = f.read().split('\n') for d in data: print d if "<!-- AUTO-GENERATED-START -->" in d: print '| Key | Default Value | Description |' print '| --- | --- | --- |' break with open('values.yaml', 'r') as f_v: d_v = f_v.read() yaml = YAML() code = yaml.load(d_v) yaml.explicit_start = True yaml.dump(code, sys.stdout, transform=decode_helm_yaml) for d in data: if "<!-- AUTO-GENERATED-END -->" in d: endReached = True if endReached: print d
def create_all_dags(args, neural_factory): ''' creates train and eval dags as well as their callbacks returns train loss tensor and callbacks''' # parse the config files yaml = YAML(typ="safe") with open(args.model_config) as f: contextnet_params = yaml.load(f) vocab = contextnet_params['labels'] sample_rate = contextnet_params['sample_rate'] # Calculate num_workers for dataloader total_cpus = os.cpu_count() cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1) # create data layer for training train_dl_params = copy.deepcopy(contextnet_params["AudioToTextDataLayer"]) train_dl_params.update(contextnet_params["AudioToTextDataLayer"]["train"]) del train_dl_params["train"] del train_dl_params["eval"] # del train_dl_params["normalize_transcripts"] data_layer_train = nemo_asr.AudioToTextDataLayer( manifest_filepath=args.train_dataset, sample_rate=sample_rate, labels=vocab, batch_size=args.batch_size, num_workers=cpu_per_traindl, **train_dl_params, ) N = len(data_layer_train) steps_per_epoch = int( N / (args.batch_size * args.iter_per_step * args.num_gpus)) # create separate data layers for eval # we need separate eval dags for separate eval datasets # but all other modules in these dags will be shared eval_dl_params = copy.deepcopy(contextnet_params["AudioToTextDataLayer"]) eval_dl_params.update(contextnet_params["AudioToTextDataLayer"]["eval"]) del eval_dl_params["train"] del eval_dl_params["eval"] data_layers_eval = [] if args.eval_datasets: for eval_dataset in args.eval_datasets: data_layer_eval = nemo_asr.AudioToTextDataLayer( manifest_filepath=eval_dataset, sample_rate=sample_rate, labels=vocab, batch_size=args.eval_batch_size, num_workers=cpu_per_traindl, **eval_dl_params, ) data_layers_eval.append(data_layer_eval) else: logging.warning("There were no val datasets passed") # create shared modules data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( sample_rate=sample_rate, **contextnet_params["AudioToMelSpectrogramPreprocessor"], ) # Inject the `kernel_size_factor` kwarg to the ContextNet config # Skip the last layer as that must be a pointwise kernel for idx in range( len(contextnet_params["ContextNetEncoder"]["jasper"]) - 1): contextnet_params["ContextNetEncoder"]["jasper"][idx][ "kernel_size_factor"] = args.kernel_size_factor # (ContextNet uses the Jasper baseline encoder and decoder) encoder = nemo_asr.ContextNetEncoder( feat_in=contextnet_params["AudioToMelSpectrogramPreprocessor"] ["features"], **contextnet_params["ContextNetEncoder"], ) decoder = nemo_asr.JasperDecoderForCTC( feat_in=contextnet_params["ContextNetEncoder"]["jasper"][-1] ["filters"], num_classes=len(vocab), ) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab), zero_infinity=True) greedy_decoder = nemo_asr.GreedyCTCDecoder() # create augmentation modules (only used for training) if their configs # are present multiply_batch_config = contextnet_params.get('MultiplyBatch', None) if multiply_batch_config: multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config) spectr_augment_config = contextnet_params.get('SpectrogramAugmentation', None) if spectr_augment_config: data_spectr_augmentation = nemo_asr.SpectrogramAugmentation( **spectr_augment_config) # assemble train DAG ( audio_signal_t, a_sig_length_t, transcript_t, transcript_len_t, ) = data_layer_train() processed_signal_t, p_length_t = data_preprocessor( input_signal=audio_signal_t, length=a_sig_length_t) if multiply_batch_config: ( processed_signal_t, p_length_t, transcript_t, transcript_len_t, ) = multiply_batch( in_x=processed_signal_t, in_x_len=p_length_t, in_y=transcript_t, in_y_len=transcript_len_t, ) if spectr_augment_config: processed_signal_t = data_spectr_augmentation( input_spec=processed_signal_t) encoded_t, encoded_len_t = encoder(audio_signal=processed_signal_t, length=p_length_t) log_probs_t = decoder(encoder_output=encoded_t) predictions_t = greedy_decoder(log_probs=log_probs_t) loss_t = ctc_loss( log_probs=log_probs_t, targets=transcript_t, input_length=encoded_len_t, target_length=transcript_len_t, ) # create train callbacks train_callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss_t, predictions_t, transcript_t, transcript_len_t], print_func=partial(monitor_asr_train_progress, labels=vocab), get_tb_values=lambda x: [["loss", x[0]]], tb_writer=neural_factory.tb_writer, step_freq=args.update_freq, ) callbacks = [train_callback] if args.checkpoint_dir or args.load_dir: chpt_callback = nemo.core.CheckpointCallback( folder=args.checkpoint_dir, load_from_folder=args.load_dir, step_freq=args.checkpoint_save_freq, ) callbacks.append(chpt_callback) # Log training metrics to wandb if args.project is not None: wand_callback = nemo.core.WandbCallback( train_tensors=[loss_t], wandb_name=args.exp_name, wandb_project=args.project, update_freq=args.update_freq, args=args, ) callbacks.append(wand_callback) # assemble eval DAGs for i, eval_dl in enumerate(data_layers_eval): ( audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e, ) = eval_dl() processed_signal_e, p_length_e = data_preprocessor( input_signal=audio_signal_e, length=a_sig_length_e) encoded_e, encoded_len_e = encoder(audio_signal=processed_signal_e, length=p_length_e) log_probs_e = decoder(encoder_output=encoded_e) predictions_e = greedy_decoder(log_probs=log_probs_e) loss_e = ctc_loss( log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e, ) # create corresponding eval callback tagname = os.path.basename(args.eval_datasets[i]).split(".")[0] eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[ loss_e, predictions_e, transcript_e, transcript_len_e, ], user_iter_callback=partial(process_evaluation_batch, labels=vocab), user_epochs_done_callback=partial(process_evaluation_epoch, tag=tagname), eval_step=args.eval_freq, tb_writer=neural_factory.tb_writer, ) callbacks.append(eval_callback) return loss_t, callbacks, steps_per_epoch
from ruamel.yaml import YAML yaml = YAML() files = ('../remoteid/augmented.yaml', '../remoteid/canonical.yaml') for fname in files: try: with open(fname, 'r') as f: api = yaml.load(f) with open(fname, 'w') as f: yaml.dump(api, f) except IOError: pass
LOG_DIR = LOG_ROOT + TASK_NAME + '/' + time_str + '/' os.makedirs(LOG_DIR, exist_ok=True) LOGFILE = LOG_DIR + 'msgs.log' TRAIN_LOG = LOG_DIR + 'training.log' TEST_LOG = LOG_DIR + 'test.log' MODEL_WTS = LOG_DIR + 'model_weights.hdf5' LOG_YAML = 'logger_config.yaml' def logfile(): return logging.FileHandler(LOGFILE) with open(LOG_YAML, 'rt') as f: yaml = YAML(typ='safe', pure=True) config = yaml.load(f.read()) logging.config.dictConfig(config) logger = logging.getLogger(TASK_NAME) ex.logger = logger @ex.capture def build_train(N_train, train_batch_size, train_min_num_seq, train_max_num_seq, train_avg_len): ntm = build_ntm(N=N_train) data_gen = build_data_gen(ntm, train_batch_size, train_min_num_seq, train_max_num_seq, train_avg_len) return ntm, data_gen @ex.capture def build_test(N_test, test_batch_size, test_min_num_seq, test_max_num_seq, test_avg_len):
def loadyaml(yamlp: YAML, source: str) -> Any: """Load YAML.""" return yamlp.load(source)
def load_yaml(path): with open(path, 'r') as yfile: yml = YAML() return yml.load(yfile)
3. if same exit, if not, continue 4. modify bitrise.yml (update stack value) ''' largest_semver = largest_version() tmp_file = 'tmp.yml' with open(BITRISE_YML, 'r') as infile: obj_yaml = YAML() # prevents re-formatting of yml file obj_yaml.preserve_quotes = True obj_yaml.width = 4096 y = obj_yaml.load(infile) current_semver = y['workflows'][WORKFLOW]['meta']['bitrise.io'][ 'stack'] # remove pattern prefix from current_semver to compare with largest current_semver = current_semver.split(pattern)[1] if current_semver == largest_semver: print('Xcode version unchanged! aborting.') else: print('New Xcode version available: {0} ... updating bitrise.yml!'. format(largest_semver)) # add prefix pattern back to be recognizable by bitrise # as a valid stack value y['workflows'][WORKFLOW]['meta']['bitrise.io'][
def read_file(self, filename): mydoc = open(filename, "r") yaml = YAML(typ='safe') yaml.default_flow_style = False return yaml, yaml.load(mydoc)
def read_yaml(file_path): with open(file_path, 'r') as stream: yaml = YAML() return yaml.load(stream.read())
You are receiving this email because you indicated that you are interested in {}. To unsubscribe, use [this link](%mailing_list_unsubscribe_url%) """.format MEETING_MESSAGE_FOOTER = """ --- You are receiving this email because you registered for a VSF Zoom meeting with ID {}. """.format if __name__ == "__main__": yaml = YAML() repo = common.vsf_repo() issue = repo.get_issue(int(os.getenv("ISSUE_NUMBER"))) data = issue.body.replace('\r', '') header, body = data.split('---', maxsplit=1) header = yaml.load(header) if header["to"] in ("vsf_announce", "speakers_corner"): to = header["to"] + "@mail.virtualscienceforum.org" body += MAILING_LIST_FOOTER(MAILING_LIST_DESCRIPTIONS[header["to"]]) response = common.api_query(requests.post, common.MAILGUN_BASE_URL + "messages", data={ "from": header["from"], "to": to, "subject": header["subject"], "text": common.markdown_to_plain(body), "html": common.markdown_to_email(body), }) else: meeting_id = int(header["to"]) # We are sending an email to zoom meeting participants
def _is_new_config(filename, tmp_filename): with open(filename, "r") as old_file, open(tmp_filename, "r") as new_file: yaml = YAML() old_config = yaml.load(old_file) new_config = yaml.load(new_file) return old_config != new_config
from ruamel.yaml import YAML yaml = YAML(typ='safe') with open('config.yaml', 'r') as f: config = yaml.load(f)
def _fix_token(config_file=None, force=False, verify=True): from ruamel.yaml import YAML yaml = YAML() config_file = config_file or TRAVIS_CONFIG_FILE with open(config_file, "r") as _file: try: travis_config = yaml.load(_file) except Exception: raise ValueError( "Failed to parse the travis configuration. " "Make sure the config only contains valid YAML and keys as specified by travis." ) # Get the generated token from the top level deploy config added by the travis cli try: real_token = travis_config["deploy"]["api_key"]["secure"] except (TypeError, KeyError): raise AssertionError("Can't find any top level deployment tokens") try: # Find the build stage that deploys to releases releases_stages = [ stage for stage in travis_config["jobs"]["include"] if stage.get("deploy", dict()).get("provider") == "releases" ] assert ( len(releases_stages) > 0 ), "Can't set the new token because there are no stages deploying to releases" assert ( len(releases_stages) < 2 ), "Can't set the new token because there are multiple stages deploying to releases" except (TypeError, KeyError): raise AssertionError( "Can't set the new token because there are no deployment stages") try: is_mock_token = releases_stages[0]["deploy"]["token"]["secure"] == "REPLACE_ME" is_same_token = releases_stages[0]["deploy"]["token"]["secure"] == real_token unmodified = is_mock_token or is_same_token except (TypeError, KeyError): unmodified = False # Set the new generated token as the stages deploy token _create(releases_stages[0], "deploy", "token", "secure") releases_stages[0]["deploy"]["token"]["secure"] = real_token # Make sure it is fine to overwrite the config file assert unmodified or force, ( 'The secure token in the "{}" stage has already been changed. ' "Retry with --force if you are sure about replacing it.".format( releases_stages[0].get("stage", "releases deployment") ) ) # Remove the top level deploy config added by the travis cli travis_config.pop("deploy") if not unmodified and verify: pprint.pprint(travis_config) if ( not input("Do you want to save this configuration? (y/n) ") .strip() .lower() == "y" ): return # Save the new travis config assert travis_config with open(config_file, "w") as _file: yaml.dump(travis_config, _file) print("Fixed!")
import tensorflow as tf import tensorflow.contrib.slim as slim from input_ops import create_input_ops from model import Model from ruamel.yaml import YAML from util import log try: import better_exceptions except ImportError: pass yaml_path = Path('config.yaml') yaml = YAML(typ='safe') config = yaml.load(yaml_path) paths = config['paths'] h5py_dir = paths['h5py_dir'] logs_dir = paths['logs_dir'] class Trainer(object): def __init__(self, config, dataset, dataset_test): self.config = config hyper_parameter_str = config.model + '-is_' + str(config.img_size) + '-bs_' + str(config.batch_size) + \ '-lr_' + "{:.2E}".format(config.learning_rate) + '-ur_' + str(config.update_rate) self.train_dir = logs_dir + '/%s-%s/train_dir/' % ( hyper_parameter_str, time.strftime("%Y%m%d_%H%M%S")) self.test_dir = logs_dir + '/%s-%s/test_dir/' % (
def main(): parser = argparse.ArgumentParser(description='Jasper') # model params parser.add_argument("--model_config", type=str, required=True) parser.add_argument("--eval_datasets", type=str, required=True) parser.add_argument("--load_dir", type=str, required=True) # run params parser.add_argument("--local_rank", default=None, type=int) parser.add_argument("--batch_size", default=64, type=int) parser.add_argument("--amp_opt_level", default="O1", type=str) # store results parser.add_argument("--save_logprob", default=None, type=str) # lm inference parameters parser.add_argument("--lm_path", default=None, type=str) parser.add_argument('--alpha', default=2.0, type=float, help='value of LM weight', required=False) parser.add_argument( '--alpha_max', type=float, help='maximum value of LM weight (for a grid search in \'eval\' mode)', required=False, ) parser.add_argument( '--alpha_step', type=float, help='step for LM weight\'s tuning in \'eval\' mode', required=False, default=0.1 ) parser.add_argument('--beta', default=1.5, type=float, help='value of word count weight', required=False) parser.add_argument( '--beta_max', type=float, help='maximum value of word count weight (for a grid search in \ \'eval\' mode', required=False, ) parser.add_argument( '--beta_step', type=float, help='step for word count weight\'s tuning in \'eval\' mode', required=False, default=0.1, ) parser.add_argument("--beam_width", default=128, type=int) args = parser.parse_args() batch_size = args.batch_size load_dir = args.load_dir if args.local_rank is not None: if args.lm_path: raise NotImplementedError( "Beam search decoder with LM does not currently support evaluation on multi-gpu." ) device = nemo.core.DeviceType.AllGpu else: device = nemo.core.DeviceType.GPU # Instantiate Neural Factory with supported backend neural_factory = nemo.core.NeuralModuleFactory( local_rank=args.local_rank, optimization_level=args.amp_opt_level, placement=device, ) if args.local_rank is not None: logging.info('Doing ALL GPU') yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) try: vocab = jasper_params['labels'] sample_rate = jasper_params['sample_rate'] except KeyError: logging.error("Please make sure you are using older config format (the ones with -old suffix)") exit(1) eval_datasets = args.eval_datasets eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"]) del eval_dl_params["train"] del eval_dl_params["eval"] data_layer = nemo_asr.AudioToTextDataLayer( manifest_filepath=eval_datasets, sample_rate=sample_rate, labels=vocab, batch_size=batch_size, **eval_dl_params, ) N = len(data_layer) logging.info('Evaluating {0} examples'.format(N)) data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"] ) jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"] ) jasper_decoder = nemo_asr.JasperDecoderForCTC( feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab) ) greedy_decoder = nemo_asr.GreedyCTCDecoder() logging.info('================================') logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}") logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}") logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}") logging.info('================================') # Define inference DAG audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1 = data_layer() processed_signal_e1, p_length_e1 = data_preprocessor(input_signal=audio_signal_e1, length=a_sig_length_e1) encoded_e1, encoded_len_e1 = jasper_encoder(audio_signal=processed_signal_e1, length=p_length_e1) log_probs_e1 = jasper_decoder(encoder_output=encoded_e1) predictions_e1 = greedy_decoder(log_probs=log_probs_e1) eval_tensors = [log_probs_e1, predictions_e1, transcript_e1, transcript_len_e1, encoded_len_e1] # inference evaluated_tensors = neural_factory.infer(tensors=eval_tensors, checkpoint_dir=load_dir) greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab) references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab) wer = word_error_rate(hypotheses=greedy_hypotheses, references=references) logging.info("Greedy WER {:.2f}%".format(wer * 100)) # Convert logits to list of numpy arrays logprob = [] for i, batch in enumerate(evaluated_tensors[0]): for j in range(batch.shape[0]): logprob.append(batch[j][: evaluated_tensors[4][i][j], :].cpu().numpy()) if args.save_logprob: with open(args.save_logprob, 'wb') as f: pickle.dump(logprob, f, protocol=pickle.HIGHEST_PROTOCOL) # language model if args.lm_path: if args.alpha_max is None: args.alpha_max = args.alpha # include alpha_max in tuning range args.alpha_max += args.alpha_step / 10.0 if args.beta_max is None: args.beta_max = args.beta # include beta_max in tuning range args.beta_max += args.beta_step / 10.0 beam_wers = [] logprobexp = [np.exp(p) for p in logprob] for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step): for beta in np.arange(args.beta, args.beta_max, args.beta_step): logging.info('================================') logging.info(f'Infering with (alpha, beta): ({alpha}, {beta})') beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM( vocab=vocab, beam_width=args.beam_width, alpha=alpha, beta=beta, lm_path=args.lm_path, num_cpus=max(os.cpu_count(), 1), input_tensor=False, ) beam_predictions = beam_search_with_lm(log_probs=logprobexp, log_probs_length=None, force_pt=True) beam_predictions = [b[0][1] for b in beam_predictions[0]] lm_wer = word_error_rate(hypotheses=beam_predictions, references=references) logging.info("Beam WER {:.2f}%".format(lm_wer * 100)) beam_wers.append(((alpha, beta), lm_wer * 100)) logging.info('Beam WER for (alpha, beta)') logging.info('================================') logging.info('\n' + '\n'.join([str(e) for e in beam_wers])) logging.info('================================') best_beam_wer = min(beam_wers, key=lambda x: x[1]) logging.info('Best (alpha, beta): ' f'{best_beam_wer[0]}, ' f'WER: {best_beam_wer[1]:.2f}%')
class Config(object): def __init__(self, configFile: str): self.configFile = configFile self._configData = {} self.yaml = YAML() self._inBaseConfig = [] def loadConfig(self) -> None: configData = self._readConfig(self.configFile) self._validate(configData) self._configData = configData def _readConfig(self, fileName: str) -> Dict: try: with open(fileName, mode='r') as config: configData = self.yaml.load(config) if not configData: configData = {} # if this is the base server config, store what keys we loaded if fileName == self.configFile: self._inBaseConfig = list(configData.keys()) except Exception as e: raise ConfigError(fileName, e) if 'import' not in configData: return configData for fname in configData['import']: includeConfig = self._readConfig('{}/{}.yaml' .format(os.path.dirname(os.path.abspath(fileName)), fname)) for key, val in includeConfig.items(): # not present in base config, just assign it if key not in configData: configData[key] = val continue # skip non-collection types that are already set if isinstance(configData[key], (str, int)): continue if isinstance(val, str): raise ConfigError(fname, 'The included config file tried ' 'to merge a non-string with a ' 'string') try: iter(configData[key]) iter(val) except TypeError: # not a collection, so just don't merge them pass else: try: # merge with + operator configData[key] += val except TypeError: # dicts can't merge with + try: for subKey, subVal in val.items(): if subKey not in configData[key]: configData[key][subKey] = subVal except (AttributeError, TypeError): # if either of these, they weren't both dicts. raise ConfigError(fname, 'The variable {!r} could ' 'not be successfully ' 'merged'.format(key)) return configData def writeConfig(self) -> None: # filter the configData to only those keys # that were present in the base server config, # or have been modified at runtime configData = copy.deepcopy(self._configData) to_delete = set(configData.keys()).difference(self._inBaseConfig) for key in to_delete: del configData[key] # write the filtered configData try: with open(self.configFile, mode='w') as config: self.yaml.dump(configData, config) except Exception as e: raise ConfigError(self.configFile, e) def getWithDefault(self, key: str, default=None) -> Any: if key in self._configData: return self._configData[key] return default def _validate(self, configData) -> None: for key in _required: if key not in configData: raise ConfigError(self.configFile, 'Required item {!r} was not found in the config.'.format(key)) def __len__(self): return len(self._configData) def __iter__(self): return iter(self._configData) def __getitem__(self, key): return self._configData[key] def __setitem__(self, key, value): # mark this key to be saved in the server config if key not in self._inBaseConfig: self._inBaseConfig.append(key) self._configData[key] = value def __contains__(self, key): return key in self._configData
def create_all_dags(args, neural_factory): """Create Directed Acyclic Graph (DAG) for training and evaluation """ logger = neural_factory.logger yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) vocab = jasper_params['labels'] sample_rate = jasper_params['sample_rate'] # Calculate num_workers for dataloader total_cpus = os.cpu_count() cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1) # train params # perturb_config = jasper_params.get('perturb', None) train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"]) del train_dl_params["train"] del train_dl_params["eval"] # del train_dl_params["normalize_transcripts"] data_layer = nemo_asr.AudioToTextDataLayer( manifest_filepath=args.train_dataset, sample_rate=sample_rate, labels=vocab, batch_size=args.batch_size, num_workers=cpu_per_traindl, **train_dl_params, # normalize_transcripts=False ) N = len(data_layer) steps_per_epoch = int(N / (args.batch_size * args.num_gpus)) logger.info('Have {0} examples to train on.'.format(N)) data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor( sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"]) multiply_batch_config = jasper_params.get('MultiplyBatch', None) if multiply_batch_config: multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config) spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None) if spectr_augment_config: data_spectr_augmentation = nemo_asr.SpectrogramAugmentation( **spectr_augment_config) eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"]) del eval_dl_params["train"] del eval_dl_params["eval"] data_layers_eval = [] if args.eval_datasets: for eval_datasets in args.eval_datasets: data_layer_eval = nemo_asr.AudioToTextDataLayer( manifest_filepath=eval_datasets, sample_rate=sample_rate, labels=vocab, batch_size=args.eval_batch_size, num_workers=cpu_per_traindl, **eval_dl_params, ) data_layers_eval.append(data_layer_eval) else: neural_factory.logger.info("There were no val datasets passed") # set encoder and decoders jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"]) jasper_decoder = nemo_asr.JasperDecoderForCTC( feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab), factory=neural_factory) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab)) greedy_decoder = nemo_asr.GreedyCTCDecoder() # finetuning - restore encoder and decoder if args.finetune: logger.info("Finetuning") jasper_encoder.restore_from(args.load_encoder) logger.info("Loaded encoder: {}".format(args.load_encoder)) if args.load_decoder != "": jasper_decoder.restore_from(args.load_decoder) logger.info("Loaded decoder: {}".format(args.load_decoder)) logger.info('================================') logger.info( f"Number of parameters in encoder: {jasper_encoder.num_weights}") logger.info( f"Number of parameters in decoder: {jasper_decoder.num_weights}") logger.info(f"Total number of parameters: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}") logger.info('================================') # Train DAG audio_signal_t, a_sig_length_t, \ transcript_t, transcript_len_t = data_layer() processed_signal_t, p_length_t = data_preprocessor( input_signal=audio_signal_t, length=a_sig_length_t) if multiply_batch_config: processed_signal_t, p_length_t, transcript_t, transcript_len_t = \ multiply_batch( in_x=processed_signal_t, in_x_len=p_length_t, in_y=transcript_t, in_y_len=transcript_len_t) if spectr_augment_config: processed_signal_t = data_spectr_augmentation( input_spec=processed_signal_t) encoded_t, encoded_len_t = jasper_encoder(audio_signal=processed_signal_t, length=p_length_t) log_probs_t = jasper_decoder(encoder_output=encoded_t) predictions_t = greedy_decoder(log_probs=log_probs_t) loss_t = ctc_loss(log_probs=log_probs_t, targets=transcript_t, input_length=encoded_len_t, target_length=transcript_len_t) # Callbacks needed to print info to console and Tensorboard train_callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss_t, predictions_t, transcript_t, transcript_len_t], print_func=partial(monitor_asr_train_progress, labels=vocab, logger=logger), get_tb_values=lambda x: [("loss", x[0])], tb_writer=neural_factory.tb_writer, ) chpt_callback = nemo.core.CheckpointCallback( folder=neural_factory.checkpoint_dir, step_freq=args.checkpoint_save_freq) callbacks = [train_callback, chpt_callback] # assemble eval DAGs for i, eval_dl in enumerate(data_layers_eval): audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e = \ eval_dl() processed_signal_e, p_length_e = data_preprocessor( input_signal=audio_signal_e, length=a_sig_length_e) encoded_e, encoded_len_e = jasper_encoder( audio_signal=processed_signal_e, length=p_length_e) log_probs_e = jasper_decoder(encoder_output=encoded_e) predictions_e = greedy_decoder(log_probs=log_probs_e) loss_e = ctc_loss(log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e) # create corresponding eval callback tagname = os.path.basename(args.eval_datasets[i]).split(".")[0] eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[ loss_e, predictions_e, transcript_e, transcript_len_e ], user_iter_callback=partial(process_evaluation_batch, labels=vocab), user_epochs_done_callback=partial(process_evaluation_epoch, tag=tagname, logger=logger), eval_step=args.eval_freq, tb_writer=neural_factory.tb_writer) callbacks.append(eval_callback) return loss_t, callbacks, steps_per_epoch