Ejemplo n.º 1
0
async def test_lovelace_update_view(hass, hass_ws_client):
    """Test update_view command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')
    origyaml = yaml.load(TEST_YAML_A)

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=origyaml), \
        patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/update',
            'view_id': 'example',
            'view_config': 'id: example2\ntitle: New title\n',
        })
        msg = await client.receive_json()

    result = save_yaml_mock.call_args_list[0][0][1]
    orig_view = origyaml.mlget(['views', 0], list_ok=True)
    new_view = result.mlget(['views', 0], list_ok=True)
    assert new_view['title'] == 'New title'
    assert new_view['cards'] == orig_view['cards']
    assert 'theme' not in new_view
    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
Ejemplo n.º 2
0
async def test_lovelace_move_card_view_position(hass, hass_ws_client):
    """Test move_card to view with position command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
        patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/card/move',
            'card_id': 'test',
            'new_view_id': 'example',
            'new_position': 1,
        })
        msg = await client.receive_json()

    result = save_yaml_mock.call_args_list[0][0][1]
    assert result.mlget(['views', 0, 'cards', 1, 'title'],
                        list_ok=True) == 'Test card'
    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
Ejemplo n.º 3
0
    def parse(self, input):
        """parse the given file or file source string"""
        if hasattr(input, 'name'):
            self.filename = input.name
        elif not getattr(self, 'filename', ''):
            self.filename = ''
        if hasattr(input, "read"):
            src = input.read()
            input.close()
            input = src
        if isinstance(input, bytes):
            input = input.decode('utf-8')
        yaml = YAML()
        try:
            self._file = yaml.load(input)
        except YAMLError as e:
            message = e.problem if hasattr(e, 'problem') else e.message
            if hasattr(e, 'problem_mark'):
                message += ' {0}'.format(e.problem_mark)
            raise base.ParseError(message)

        self._file = self.preprocess(self._file)

        for k, data in self._flatten(self._file):
            unit = self.UnitClass(data)
            unit.setid(k)
            self.addunit(unit)
Ejemplo n.º 4
0
async def test_lovelace_get_view(hass, hass_ws_client):
    """Test get_view command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)):
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/get',
            'view_id': 'example',
        })
        msg = await client.receive_json()

    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
    assert "".join(msg['result'].split()) == "".join('title: Example\n # \
                             Optional unique id for direct\
                             access /lovelace/${id}\nid: example\n # Optional\
                             background (overwrites the global background).\n\
                             background: radial-gradient(crimson, skyblue)\n\
                             # Each view can have a different theme applied.\n\
                             theme: dark-mode\n'.split())
Ejemplo n.º 5
0
    def testToYAMLs(self, tests, rootFolder='./testsNewBuild/'):
        """Writes a batch of tests to file in the yaml format, grouping them by team and name

        :param tests: list of tests to write to file
        :type tests: list
        :param rootFolder: destination folder, defaults to './testsNewBuild/'
        :param rootFolder: str, optional
        """

        # extract unique test names
        uniqueTestNames = set([c.name for c in tests])
        # group by test names to put them in same files
        for name in uniqueTestNames:
            yaml = YAML()
            yaml.default_flow_style = False
            testDict = None
            for t in tests:
                if t.name == name:
                    f = open(os.path.join(
                        rootFolder, t.team, name + '.yaml'), "w+")
                    if testDict == None:
                        testDict = t.toDict()
                    else:
                        key = 'metric' + str(len(testDict['metrics'])+1)
                        testDict['metrics'][key] = t.toDict()[
                            'metrics']['metric1']
            yaml.dump(testDict, f)
    def handle(self, *args, **options):
        def flatten(l):
            return [item for sublist in l for item in sublist]

        yaml = YAML()
        with open(options['yaml']) as yamlfile:
            data = yaml.load(yamlfile)

        for attribute in flatten(data['attributes'].values()):
            SuomiFiUserAttribute.objects.update_or_create(
                friendly_name=attribute['friendly_name'],
                uri=attribute['uri'],
                name=attribute['name'],
                description=attribute['description']
            )

        for level, details in data['access_levels'].items():
            access_level, created = SuomiFiAccessLevel.objects.update_or_create(shorthand=level)
            for language, name in details['name'].items():
                access_level.set_current_language(language)
                access_level.name = name
            for language, description in details['description'].items():
                access_level.set_current_language(language)
                access_level.description = description
            for attribute in flatten(details['fields']):
                access_level.attributes.add(SuomiFiUserAttribute.objects.get(friendly_name=attribute['friendly_name']))
            access_level.save()
Ejemplo n.º 7
0
def mocked_config_file_path(
    fake_temp_data_pocketsphinx_dic, fake_temp_data_pocketsphinx_lm, tmpdir_factory
):
    path_to_pocketsphix_dic = os.path.join(
        str(fake_temp_data_pocketsphinx_dic), "fake.dic"
    )
    path_to_pocketsphix_lm = os.path.join(
        str(fake_temp_data_pocketsphinx_lm), "fake.lm"
    )
    # config part
    base = tempfile.mkdtemp()
    config_file = os.path.join(base, "config.yaml")

    yaml = YAML()

    m_cfg = yaml.load(COMMON_MOCKED_CONFIG)
    m_cfg["pocketsphinx"]["dic"] = path_to_pocketsphix_dic
    m_cfg["pocketsphinx"]["lm"] = path_to_pocketsphix_lm

    with open(config_file, "w", encoding="utf-8") as fp:
        yaml.dump(m_cfg, fp)

    yield config_file

    shutil.rmtree(base)
Ejemplo n.º 8
0
Archivo: misc.py Proyecto: mjirik/io3d
def obj_from_file(filename='annotation.yaml', filetype='auto'):
    ''' Read object from file '''

    if filetype == 'auto':
        _, ext = os.path.splitext(filename)
        filetype = ext[1:]

    if filetype in ('yaml', 'yml'):
        from ruamel.yaml import YAML
        yaml = YAML(typ="unsafe")
        with open(filename, encoding="utf-8") as f:
            obj = yaml.load(f)
        if obj is None:
            obj = {}
        # import yaml
        # with open(filename, encoding="utf-8") as f:
        #     intext = f.read()
        #     obj = yaml.load(intext)
    elif filetype in ('pickle', 'pkl', 'pklz', 'picklezip'):
        fcontent = read_pkl_and_pklz(filename)
        # import pickle
        if sys.version_info[0] < 3:
            import cPickle as pickle
        else:
            import _pickle as pickle
        # import sPickle as pickle
        if sys.version_info.major == 2:
            obj = pickle.loads(fcontent)
        else:
            obj = pickle.loads(fcontent, encoding="latin1")
    else:
        logger.error('Unknown filetype ' + filetype)
    return obj
Ejemplo n.º 9
0
 def dump(self, data, stream=None, **kw):
     inefficient = False
     if stream is None:
         inefficient = True
         stream = StringIO()
     YAML.dump(self, data, stream, **kw)
     if inefficient:
         return stream.getvalue()
Ejemplo n.º 10
0
 def test_to_file(self):
     filename = "ff_test.yaml"
     b = self.benzene
     b.to_file(filename=filename)
     yaml = YAML(typ="safe")
     with open(filename, "r") as f:
         d = yaml.load(f)
     self.assertListEqual(d["mass_info"], [list(m) for m in b.mass_info])
     self.assertListEqual(d["pair_coeffs"], b.pair_coeffs)
Ejemplo n.º 11
0
    def to_yaml(self, filename):
        if self.stats is None:
            logger.error("Run .skeleton_analysis() before .to_yaml()")
            return

        from ruamel.yaml import YAML
        yaml = YAML(typ="unsafe")
        with open(filename, 'wt', encoding="utf-8") as f:
            yaml.dump(self.stats, f)
Ejemplo n.º 12
0
 def test_to_file(self):
     filename = "ff_test.yaml"
     v = self.virus
     v.to_file(filename=filename)
     yaml = YAML(typ="safe")
     with open(filename, "r") as f:
         d = yaml.load(f)
     self.assertListEqual(d["mass_info"], [list(m) for m in v.mass_info])
     self.assertListEqual(d["nonbond_coeffs"], v.nonbond_coeffs)
Ejemplo n.º 13
0
def test_id_not_changed():
    """Test if id is not changed if already exists."""
    yaml = YAML(typ='rt')

    fname = "dummy.yaml"
    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_B)), \
            patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        migrate_config(fname)
    assert save_yaml_mock.call_count == 0
Ejemplo n.º 14
0
def test_save_yaml_model(tmpdir, mini_model):
    jsonschema = pytest.importorskip("jsonschema")
    """Test the writing of YAML model."""
    output_file = tmpdir.join("mini.yml")
    cio.save_yaml_model(mini_model, output_file.strpath, sort=True)
    # validate against schema
    yaml = YAML(typ="unsafe")
    with open(output_file.strpath, "r") as infile:
        yaml_to_dict = yaml.load(infile)
    dict_to_json = json.dumps(yaml_to_dict)
    loaded = json.loads(dict_to_json)
    assert jsonschema.validate(loaded, cio.json.json_schema)
Ejemplo n.º 15
0
    def from_file(cls, filename):
        """
        Constructor that reads in a file in YAML format.

        Args:
            filename (str): Filename.

        """
        yaml = YAML(typ="safe")
        with open(filename, "r") as f:
            d = yaml.load(f)
        return cls.from_dict(d)
Ejemplo n.º 16
0
    def to_file(self, filename):
        """
        Saves object to a file in YAML format.

        Args:
            filename (str): File name.

        """
        d = {"mass_info": self.mass_info, "pair_coeffs": self.pair_coeffs,
             "mol_coeffs": self.mol_coeffs}
        yaml = YAML(typ="safe")
        with open(filename, "w") as f:
            yaml.dump(d, f)
Ejemplo n.º 17
0
def test_add_id():
    """Test if id is added."""
    yaml = YAML(typ='rt')

    fname = "dummy.yaml"
    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
            patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        migrate_config(fname)

    result = save_yaml_mock.call_args_list[0][0][1]
    assert 'id' in result['views'][0]['cards'][0]
    assert 'id' in result['views'][1]
Ejemplo n.º 18
0
def vt2esofspy(vesseltree, outputfilename="tracer.txt", axisorder=[0, 1, 2]):
    """
    exports vesseltree to esofspy format

    :param vesseltree: filename or vesseltree dictionary structure
    :param outputfilename: output file name
    :param axisorder: order of axis can be specified with this option
    :return:
    """

    if (type(vesseltree) == str) and os.path.isfile(vesseltree):
        from ruamel.yaml import YAML
        yaml = YAML(typ="unsafe")
        with open(vesseltree, encoding="utf-8") as f:
            vt = yaml.load(f)
    else:
        vt = vesseltree
    logger.debug(str(vt['general']))
    logger.debug(str(vt.keys()))
    vtgm = vt['graph']['microstructure']
    lines = []
    vs = vt['general']['voxel_size_mm']
    sh = vt['general']['shape_px']

    # switch axis
    ax = axisorder

    lines.append("#Tracer+\n")
    lines.append("#voxelsize mm %f %f %f\n" % (vs[ax[0]], vs[ax[1]], vs[ax[2]]))
    lines.append("#shape %i %i %i\n" % (sh[ax[0]], sh[ax[1]], sh[ax[2]]))
    lines.append(str(len(vtgm) * 2)+"\n")

    i = 1
    for id in vtgm:
        try:
            nda = vtgm[id]['nodeA_ZYX']
            ndb = vtgm[id]['nodeB_ZYX']
            lines.append("%i\t%i\t%i\t%i\n" % (nda[ax[0]], nda[ax[1]], nda[ax[2]], i))
            lines.append("%i\t%i\t%i\t%i\n" % (ndb[ax[0]], ndb[ax[1]], ndb[ax[2]], i))
            i += 1
        except:
            pass


    lines.append("%i\t%i\t%i\t%i" % (0, 0, 0, 0))
    lines[3] = str(i - 1) + "\n"
    from builtins import str as text
    with open(outputfilename, 'wt') as f:
        for line in lines:
            f.write(text(line))
Ejemplo n.º 19
0
def edit_tmpvault(filename):
    '''Update yaml config and by changing any key with the value CHANGE_AND_REKEY

    requests a master password and uses pbkdf2 to get a master key to base all
    of the new keys off of
    '''
    yaml = YAML()
    with open(filename) as fobj:
        vault_dict = yaml.load(fobj)
    master_pass = getpass.getpass("Enter master key to generate values: ").encode('utf-8')
    master_key = hashlib.pbkdf2_hmac('sha256', master_pass, os.urandom(16), 100000)
    change_values(vault_dict, 'CHANGE_AND_REKEY', master_key)
    with open(filename, 'w') as fobj:
        yaml.dump(vault_dict, fobj)
Ejemplo n.º 20
0
def main():
    yaml = YAML()
    file_name = sys.argv[1]
    file_in = open(file_name).read()
    docs = yaml.load_all(file_in)
    i = 0
    for doc in docs:

        if i == 0:
            code_old = doc
        else:
            code_new = doc
        i = i + 1
    delta_map = dict()
    follow_keys = list()

    add = list()
    update = list()
    traversal(code_old, code_new, follow_keys, delta_map, update, add)
    yaml.dump(code_old, sys.stdout)

    split = '------love----you------choerodon----'

    print(split)
    yaml.dump(delta_map, sys.stdout)

    print(split)
    change_key_map = dict()

    change_key_map["add"] = add
    change_key_map["update"] = update
    yaml.dump(change_key_map, sys.stdout)
Ejemplo n.º 21
0
    def loadtestDictsFromFilePaths(self, testFilePaths):
        """Parses yaml files from given filepaths

        :param testFilePaths: file names to parse
        :type testFilePaths: list of strings
        :return: list of dict parsed from the yaml
        :rtype: list of dicts
        """

        testDicts = []
        yaml = YAML()
        for testFile in testFilePaths:
            with open(testFile) as f:
                testDict = yaml.load(f)
            testDicts.append(dict(testDict))
        return testDicts
Ejemplo n.º 22
0
def dumpyaml(
    yamlp: YAML, data: Any, stream: Any = None, **kw: Any
) -> Optional[str]:
    """Dump YAML to string."""
    inefficient = False
    if stream is None:
        inefficient = True
        stream = StringIO()
    # overriding here to get dumping to
    # not sort keys.
    yamlp = YAML()
    yamlp.indent(mapping=4, sequence=6, offset=3)
    # yamlp.compact(seq_seq=False, seq_map=False)
    yamlp.dump(data, stream, **kw)
    if inefficient:
        return cast(str, stream.getvalue())
    return None
Ejemplo n.º 23
0
def get_default_opttask_kwargs():
    """
    Get the default configuration kwargs for OptTask.

    Args:
        None

    Returns:
        conf_dict (dict): The default kwargs for OptTask

    """
    cwd = os.path.dirname(os.path.realpath(__file__))
    fname = os.path.join(cwd, "defaults.yaml")
    with open(fname, 'r') as config_raw:
        yaml = YAML()
        conf_dict = dict(yaml.load(config_raw))
    return conf_dict
Ejemplo n.º 24
0
 def list_products_yaml(self, hostname, system):
     from ruamel.yaml import YAML
     yml = YAML(typ='safe', pure=False)
     yml.default_flow_style = False
     yml.explicit_end = True
     yml.explicit_start = True
     yml.indent(mapping=4, sequence=4, offset=2)
     data = system.to_refhost_dict()
     data["name"] = str(hostname)
     yml.dump(data, self.output)
Ejemplo n.º 25
0
def main():
    """Main application entry point."""
    if len(sys.argv) != 3:
        print("Usage: yc-calc <input-file> <output-file>")
        sys.exit(1)

    infile = sys.argv[1]
    outfile = sys.argv[2]

    mapping_tag = yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG
    sequence_tag = yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG

    yaml.add_constructor(mapping_tag, dict_constructor,
                         Loader=RoundTripConstructor)
    yaml.add_constructor(sequence_tag, list_constructor,
                         Loader=RoundTripConstructor)

    yaml.add_representer(CalcDict, dict_representer,
                         Dumper=RoundTripRepresenter)
    yaml.add_representer(CalcList, list_representer,
                         Dumper=RoundTripRepresenter)

    try:
        with open(infile) as infp:
            top = YAML().load(infp)

            if not isinstance(top, CalcDict):
                type_name = type(top).__name__
                err("Top level element should be dict not {0}".format(type_name))

            defs = {}
            defs_str = top.get("DEFS", "")

            try:
                exec(defs_str, defs)
            except Exception as exc:
                err("Error executing DEFS: {0}".format(exc))

            CalcContainer.set_top(defs, top)
            write(top, outfile)
    except IOError as exc:
        err("Error opening file: {0}".format(exc))
    except yaml.YAMLError as exc:
        err("Error parsing input: {0}".format(exc))
Ejemplo n.º 26
0
async def test_lovelace_get_card(hass, hass_ws_client):
    """Test get_card command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)):
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/card/get',
            'card_id': 'test',
        })
        msg = await client.receive_json()

    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
    assert msg['result'] == 'id: test\ntype: entities\ntitle: Test card\n'
Ejemplo n.º 27
0
async def test_lovelace_get_view_not_found(hass, hass_ws_client):
    """Test get_card command cannot find card."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)):
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/get',
            'view_id': 'not_found',
        })
        msg = await client.receive_json()

    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success'] is False
    assert msg['error']['code'] == 'view_not_found'
Ejemplo n.º 28
0
class TestYAML(unittest.TestCase):
    """Test lovelace.yaml save and load."""

    def setUp(self):
        """Set up for tests."""
        self.tmp_dir = mkdtemp()
        self.yaml = YAML(typ='rt')

    def tearDown(self):
        """Clean up after tests."""
        for fname in os.listdir(self.tmp_dir):
            os.remove(os.path.join(self.tmp_dir, fname))
        os.rmdir(self.tmp_dir)

    def _path_for(self, leaf_name):
        return os.path.join(self.tmp_dir, leaf_name+".yaml")

    def test_save_and_load(self):
        """Test saving and loading back."""
        fname = self._path_for("test1")
        open(fname, "w+")
        util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A))
        data = util_yaml.load_yaml(fname, True)
        assert data == self.yaml.load(TEST_YAML_A)

    def test_overwrite_and_reload(self):
        """Test that we can overwrite an existing file and read back."""
        fname = self._path_for("test2")
        open(fname, "w+")
        util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A))
        util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_B))
        data = util_yaml.load_yaml(fname, True)
        assert data == self.yaml.load(TEST_YAML_B)

    def test_load_bad_data(self):
        """Test error from trying to load unserialisable data."""
        fname = self._path_for("test3")
        with open(fname, "w") as fh:
            fh.write(TEST_BAD_YAML)
        with pytest.raises(HomeAssistantError):
            util_yaml.load_yaml(fname, True)
Ejemplo n.º 29
0
    def _readConfig(self, fileName):
        try:
            with open(fileName, "r") as config:
                yaml = YAML()
                configData = yaml.load(config)
                if not configData:
                    configData = {}
        except Exception as e:
            raise ConfigError(fileName, e)

        if "include" in configData:
            for fileName in configData["include"]:
                includeConfig = self._readConfig(fileName)
                for key, val in includeConfig.iteritems():
                    if key not in configData:
                        configData[key] = val
                    elif not isinstance(configData[key], basestring): # Let's try to merge them if they're collections
                        if isinstance(val, basestring):
                            raise ConfigError(fileName, "The included configuration file tried to merge a non-string "
                                                        "with a string.")
                        try: # Make sure both things we're merging are still iterable types (not numbers or whatever)
                            iter(configData[key])
                            iter(val)
                        except TypeError:
                            pass # Just don't merge them if they're not
                        else:
                            try:
                                configData[key] += val # Merge with the + operator
                            except TypeError: # Except that some collections (dicts) can't
                                try:
                                    for subkey, subval in val.iteritems(): # So merge them manually
                                        if subkey not in configData[key]:
                                            configData[key][subkey] = subval
                                except (AttributeError, TypeError):
                                    # If either of these, they weren't both dicts (but were still iterable);
                                    # requires user to resolve
                                    raise ConfigError(fileName, "The variable {} could not be successfully merged "
                                                                "across files.".format(key))
            del configData["include"]
        return configData
Ejemplo n.º 30
0
    def _to_text(self, filename=None, is_json=False):
        """Serialize to a json/yaml file"""
        extra_data = {} if self.extra_data is None else self.extra_data

        def cell_value(a_cell):
            if a_cell.formula and a_cell.formula.python_code:
                return '=' + a_cell.formula.python_code
            else:
                return a_cell.value

        extra_data.update(dict(
            excel_hash=self._excel_file_md5_digest,
            cell_map=dict(sorted(
                ((addr, cell_value(cell))
                 for addr, cell in self.cell_map.items() if cell.serialize),
                key=lambda x: AddressRange(x[0]).sort_key
            )),
        ))
        if not filename:
            filename = self.filename + ('.json' if is_json else '.yml')

        # hash the current file to see if this function makes any changes
        existing_hash = (self._compute_file_md5_digest(filename)
                         if os.path.exists(filename) else None)

        if not is_json:
            with open(filename, 'w') as f:
                ymlo = YAML()
                ymlo.width = 120
                ymlo.dump(extra_data, f)
        else:
            with open(filename, 'w') as f:
                json.dump(extra_data, f, indent=4)

        del extra_data['cell_map']

        # hash the newfile, return True if it changed, this is only reliable
        # on pythons which have ordered dict (CPython 3.6 & python 3.7+)
        return (existing_hash is None or
                existing_hash != self._compute_file_md5_digest(filename))
def main():
    args = parse_args()
    name = construct_name(
        args.exp_name,
        args.lr,
        args.batch_size,
        args.max_steps,
        args.num_epochs,
        args.weight_decay,
        args.optimizer,
        args.iter_per_step,
    )

    # time stamp
    date_time = datetime.now().strftime("%m-%d-%Y -- %H-%M-%S")

    log_dir = name
    if args.work_dir:
        log_dir = os.path.join(args.work_dir, name)

    if args.tensorboard_dir is None:
        tensorboard_dir = os.path.join(name, 'tensorboard', date_time)
    else:
        tensorboard_dir = args.tensorboard_dir

    if args.checkpoint_dir is None:
        checkpoint_dir = os.path.join(name, date_time)
    else:
        base_checkpoint_dir = args.checkpoint_dir
        if len(glob.glob(os.path.join(base_checkpoint_dir, '*.pt'))) > 0:
            checkpoint_dir = base_checkpoint_dir
        else:
            checkpoint_dir = os.path.join(args.checkpoint_dir, date_time)

    # instantiate Neural Factory with supported backend
    neural_factory = nemo.core.NeuralModuleFactory(
        backend=nemo.core.Backend.PyTorch,
        local_rank=args.local_rank,
        optimization_level=args.amp_opt_level,
        log_dir=log_dir,
        checkpoint_dir=checkpoint_dir,
        create_tb_writer=args.create_tb_writer,
        files_to_copy=[args.model_config, __file__],
        cudnn_benchmark=args.cudnn_benchmark,
        tensorboard_dir=tensorboard_dir,
    )
    args.num_gpus = neural_factory.world_size

    if args.local_rank is not None:
        logging.info('Doing ALL GPU')

    # build dags
    train_loss, callbacks, steps_per_epoch = create_all_dags(
        args, neural_factory)

    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)

    lr_schedule = jasper_params.get('lr_schedule', 'CosineAnnealing')

    if lr_schedule == 'CosineAnnealing':
        lr_policy = CosineAnnealing(
            total_steps=args.max_steps if args.max_steps is not None else
            args.num_epochs * steps_per_epoch,
            warmup_ratio=args.warmup_ratio,
            min_lr=args.min_lr,
        )
    elif lr_schedule == 'PolynomialDecayAnnealing':
        lr_policy = PolynomialDecayAnnealing(
            total_steps=args.max_steps if args.max_steps is not None else
            args.num_epochs * steps_per_epoch,
            warmup_ratio=args.warmup_ratio,
            min_lr=args.min_lr,
            power=2.0,
        )
    elif lr_schedule == 'PolynomialHoldDecayAnnealing':
        lr_policy = PolynomialHoldDecayAnnealing(
            total_steps=args.max_steps if args.max_steps is not None else
            args.num_epochs * steps_per_epoch,
            warmup_ratio=args.warmup_ratio,
            hold_ratio=args.hold_ratio,
            min_lr=args.min_lr,
            power=2.0,
        )
    else:
        raise ValueError("LR schedule is invalid !")

    logging.info(f"Using `{lr_policy}` Learning Rate Scheduler")

    # train model
    neural_factory.train(
        tensors_to_optimize=[train_loss],
        callbacks=callbacks,
        lr_policy=lr_policy,
        optimizer=args.optimizer,
        optimization_params={
            "num_epochs": args.num_epochs,
            "max_steps": args.max_steps,
            "lr": args.lr,
            "momentum": 0.95,
            "betas": (args.beta1, args.beta2),
            "weight_decay": args.weight_decay,
            "grad_norm_clip": None,
        },
        batches_per_step=args.iter_per_step,
    )
Ejemplo n.º 32
0
'''
	Model-Based Actor-Critic Script: MBPO
	Do not modify.
'''
# pylint: disable=E0401
import sys
from ruamel.yaml import YAML
from src.mbpo import MBPO

if __name__ == "__main__":
    # load the yaml config file
    yaml = YAML()
    v = yaml.load(open(sys.argv[1]))

    # initialize the main class
    agent = MBPO(train_kwargs=v["train_kwargs"],
                 model_kwargs=v["model_kwargs"],
                 TD3_kwargs=v["TD3_kwargs"])
    # run the training routine
    agent.train()
Ejemplo n.º 33
0
    win_wshshl.SendKeys('{F13}')


def say_nihao(systray):
    syncSpeak('你好')


def make_beep(systray):
    winsound.Beep(1000, 1000)


# ━━━ 读写配置 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

from ruamel.yaml import YAML

yaml = YAML()
from bin.common import count_file


def get_count():  # 直接从配置文件中读取count值
    with open(count_file, encoding='utf-8') as f:
        return yaml.load(f)['count']


def set_count(count):  # 直接像配置文件中读取count值
    with open(count_file, 'w', encoding='utf-8') as f:
        yaml.dump({'count': count}, f)


# ■■■ 核心功能 ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
Ejemplo n.º 34
0
class TestASRPytorch(NeMoUnitTest):
    labels = [
        " ",
        "a",
        "b",
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
        "'",
    ]
    manifest_filepath = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json"))
    featurizer_config = {
        'window': 'hann',
        'dither': 1e-05,
        'normalize': 'per_feature',
        'frame_splicing': 1,
        'int_values': False,
        'window_stride': 0.01,
        'sample_rate': freq,
        'features': 64,
        'n_fft': 512,
        'window_size': 0.02,
    }
    yaml = YAML(typ="safe")

    @classmethod
    def setUpClass(cls) -> None:
        super().setUpClass()
        data_folder = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "../data/"))
        logging.info("Looking up for test ASR data")
        if not os.path.exists(os.path.join(data_folder, "asr")):
            logging.info("Extracting ASR data to: {0}".format(
                os.path.join(data_folder, "asr")))
            tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz")
            tar.extractall(path=data_folder)
            tar.close()
        else:
            logging.info("ASR data found in: {0}".format(
                os.path.join(data_folder, "asr")))

    @classmethod
    def tearDownClass(cls) -> None:
        super().tearDownClass()
        data_folder = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "../data/"))
        logging.info("Looking up for test ASR data")
        if os.path.exists(os.path.join(data_folder, "asr")):
            shutil.rmtree(os.path.join(data_folder, "asr"))

    def test_transcript_normalizers(self):
        # Create test json
        test_strings = [
            "TEST CAPITALIZATION",
            '!\\"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~',
            "3+3=10",
            "3 + 3 = 10",
            "why     is \\t whitepsace\\tsuch a problem   why indeed",
            "\\\"Can you handle quotes?,\\\" says the boy",
            "I Jump!!!!With joy?Now.",
            "Maybe I want to learn periods.",
            "$10 10.90 1-800-000-0000",
            "18000000000 one thousand 2020",
            "1 10 100 1000 10000 100000 1000000",
            "Î  ĻƠvɆȩȅĘ ÀÁÃ Ą ÇĊňńŤŧș",
            "‘’“”❛❜❝❞「 」 〈 〉 《 》 【 】 〔 〕 ⦗ ⦘ 😙  👀 🔨",
            "It only costs $1 000 000! Cheap right?",
            "2500, 3000 are separate but 200, 125 is not",
            "1",
            "1 2",
            "1 2 3",
            "10:00pm is 10:00 pm is 22:00 but not 10: 00 pm",
            "10:00 10:01pm 10:10am 10:90pm",
            "Mr. Expand me!",
            "Mr Don't Expand me!",
        ]
        normalized_strings = [
            "test capitalization",
            'percent and \' plus',
            "three plus three ten",
            "three plus three ten",
            "why is whitepsace such a problem why indeed",
            "can you handle quotes says the boy",
            "i jump with joy now",
            "maybe i want to learn periods",
            "ten dollars ten point nine zero one eight hundred zero zero",
            "eighteen billion one thousand two thousand and twenty",
            # Two line string below
            "one ten thousand one hundred one thousand ten thousand one "
            "hundred thousand one million",
            "i loveeee aaa a ccnntts",
            "''",
            "it only costs one million dollars cheap right",
            # Two line string below
            "two thousand five hundred three thousand are separate but two "
            "hundred thousand one hundred and twenty five is not",
            "one",
            "one two",
            "one two three",
            "ten pm is ten pm is twenty two but not ten zero pm",
            "ten ten one pm ten ten am ten ninety pm",
            "mister expand me",
            "mr don't expand me",
        ]
        manifest_paths = os.path.abspath(
            os.path.join(os.path.dirname(__file__),
                         "../data/asr/manifest_test.json"))

        def remove_test_json():
            os.remove(manifest_paths)

        self.addCleanup(remove_test_json)

        with open(manifest_paths, "w") as f:
            for s in test_strings:
                f.write('{"audio_filepath": "", "duration": 1.0, "text": '
                        f'"{s}"}}\n')
        parser = parsers.make_parser(self.labels, 'en')
        manifest = collections.ASRAudioText(
            manifests_files=[manifest_paths],
            parser=parser,
        )

        for i, s in enumerate(normalized_strings):
            self.assertTrue(manifest[i].text_tokens == parser(s))

    def test_pytorch_audio_dataset(self):
        featurizer = WaveformFeaturizer.from_config(self.featurizer_config)
        ds = AudioDataset(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            featurizer=featurizer,
        )

        for i in range(len(ds)):
            if i == 5:
                logging.info(ds[i])
            # logging.info(ds[i][0].shape)
            # self.assertEqual(freq, ds[i][0].shape[0])

    def test_dataloader(self):
        batch_size = 4
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
        )
        for ind, data in enumerate(dl.data_iterator):
            # With num_workers update, this is no longer true
            # Moving to GPU is handled by AudioPreprocessor
            # data is on GPU
            # self.assertTrue(data[0].is_cuda)
            # self.assertTrue(data[1].is_cuda)
            # self.assertTrue(data[2].is_cuda)
            # self.assertTrue(data[3].is_cuda)
            # first dimension is batch
            self.assertTrue(data[0].size(0) == batch_size)
            self.assertTrue(data[1].size(0) == batch_size)
            self.assertTrue(data[2].size(0) == batch_size)
            self.assertTrue(data[3].size(0) == batch_size)

    def test_preprocessor_errors(self):
        def create_broken_preprocessor_1():
            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=2,
                                                       n_window_size=2)

        def create_broken_preprocessor_2():
            nemo_asr.AudioToMelSpectrogramPreprocessor(window_stride=2,
                                                       n_window_stride=2)

        def create_broken_preprocessor_3():
            nemo_asr.AudioToMelSpectrogramPreprocessor(n_window_stride=2)

        def create_good_preprocessor_1():
            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=0.02,
                                                       window_stride=0.01)

        def create_good_preprocessor_2():
            nemo_asr.AudioToMelSpectrogramPreprocessor(
                window_size=None,
                window_stride=None,
                n_window_size=256,
                n_window_stride=32,
            )

        self.assertRaises(ValueError, create_broken_preprocessor_1)
        self.assertRaises(ValueError, create_broken_preprocessor_2)
        self.assertRaises(ValueError, create_broken_preprocessor_3)
        create_good_preprocessor_1()
        create_good_preprocessor_2()

    def test_kaldi_dataloader(self):
        batch_size = 4
        dl = nemo_asr.KaldiFeatureDataLayer(
            kaldi_dir=os.path.abspath(
                os.path.join(os.path.dirname(__file__),
                             '../data/asr/kaldi_an4/')),
            labels=self.labels,
            batch_size=batch_size,
        )
        for data in dl.data_iterator:
            self.assertTrue(data[0].size(0) == batch_size)

        dl_test_min = nemo_asr.KaldiFeatureDataLayer(
            kaldi_dir=os.path.abspath(
                os.path.join(os.path.dirname(__file__),
                             '../data/asr/kaldi_an4/')),
            labels=self.labels,
            batch_size=batch_size,
            min_duration=1.0,
        )
        self.assertTrue(len(dl_test_min) == 18)

        dl_test_max = nemo_asr.KaldiFeatureDataLayer(
            kaldi_dir=os.path.abspath(
                os.path.join(os.path.dirname(__file__),
                             '../data/asr/kaldi_an4/')),
            labels=self.labels,
            batch_size=batch_size,
            max_duration=5.0,
        )
        self.assertTrue(len(dl_test_max) == 19)

    def test_trim_silence(self):
        batch_size = 4
        normal_dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
            shuffle=False,
        )
        trimmed_dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            trim_silence=True,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
            shuffle=False,
        )
        for norm, trim in zip(normal_dl.data_iterator,
                              trimmed_dl.data_iterator):
            for point in range(batch_size):
                self.assertTrue(norm[1][point].data >= trim[1][point].data)

    def test_audio_preprocessors(self):
        batch_size = 5
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
            shuffle=False,
        )

        installed_torchaudio = True
        try:
            import torchaudio
        except ModuleNotFoundError:
            installed_torchaudio = False
            with self.assertRaises(ModuleNotFoundError):
                to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(
                    n_fft=400, window=None)
            with self.assertRaises(ModuleNotFoundError):
                to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)

        if installed_torchaudio:
            to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(
                n_fft=400, window=None)
            to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)

        to_melspec = nemo_asr.AudioToMelSpectrogramPreprocessor(features=50)

        for batch in dl.data_iterator:
            input_signals, seq_lengths, _, _ = batch
            input_signals = input_signals.to(to_melspec._device)
            seq_lengths = seq_lengths.to(to_melspec._device)

            melspec = to_melspec.forward(input_signals, seq_lengths)

            if installed_torchaudio:
                spec = to_spectrogram.forward(input_signals, seq_lengths)
                mfcc = to_mfcc.forward(input_signals, seq_lengths)

            # Check that number of features is what we expect
            self.assertTrue(melspec[0].shape[1] == 50)

            if installed_torchaudio:
                self.assertTrue(spec[0].shape[1] == 201)  # n_fft // 2 + 1 bins
                self.assertTrue(mfcc[0].shape[1] == 15)

    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
    def test_jasper_training(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))
        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        # logging.info(jasper_encoder)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'
                                              ),
        )
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
    def test_double_jasper_training(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder1 = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_encoder2 = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max")
        mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max")
        jasper_decoder1 = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                       num_classes=len(
                                                           self.labels))
        jasper_decoder2 = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                       num_classes=len(
                                                           self.labels))

        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded1, encoded_len1 = jasper_encoder1(audio_signal=processed_signal,
                                                 length=p_length)
        encoded2, encoded_len2 = jasper_encoder2(audio_signal=processed_signal,
                                                 length=p_length)
        log_probs1 = jasper_decoder1(encoder_output=encoded1)
        log_probs2 = jasper_decoder2(encoder_output=encoded2)
        log_probs = mx_max1(x1=log_probs1, x2=log_probs2)
        encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(str(x[0].item())))
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
    def test_quartznet_training(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/quartznet_test.yaml"))) as f:
            quartz_model_definition = self.yaml.load(f)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=quartz_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **quartz_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))
        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'
                                              ),
        )
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    def test_stft_conv(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
            'stft_conv': True,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))

        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        # logging.info(jasper_encoder)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(str(x[0].item())))
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    def test_clas(self):
        with open('examples/asr/experimental/configs/garnet_an4.yaml') as file:
            cfg = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
            'stft_conv': True,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        encoder = nemo_asr.JasperEncoder(
            jasper=cfg['encoder']['jasper'],
            activation=cfg['encoder']['activation'],
            feat_in=cfg['input']['train']['features'],
        )
        connector = nemo_asr.JasperRNNConnector(
            in_channels=cfg['encoder']['jasper'][-1]['filters'],
            out_channels=cfg['decoder']['hidden_size'],
        )
        decoder = nemo.backends.pytorch.common.DecoderRNN(
            voc_size=len(self.labels),
            bos_id=0,
            hidden_size=cfg['decoder']['hidden_size'],
            attention_method=cfg['decoder']['attention_method'],
            attention_type=cfg['decoder']['attention_type'],
            in_dropout=cfg['decoder']['in_dropout'],
            gru_dropout=cfg['decoder']['gru_dropout'],
            attn_dropout=cfg['decoder']['attn_dropout'],
            teacher_forcing=cfg['decoder']['teacher_forcing'],
            curriculum_learning=cfg['decoder']['curriculum_learning'],
            rnn_type=cfg['decoder']['rnn_type'],
            n_layers=cfg['decoder']['n_layers'],
            tie_emb_out_weights=cfg['decoder']['tie_emb_out_weights'],
        )
        loss = nemo.backends.pytorch.common.SequenceLoss()

        # DAG
        audio_signal, a_sig_length, transcripts, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)
        encoded, encoded_len = encoder(audio_signal=processed_signal,
                                       length=p_length)
        encoded = connector(tensor=encoded)
        log_probs, _ = decoder(targets=transcripts, encoder_outputs=encoded)
        loss = loss(log_probs=log_probs, targets=transcripts)

        # Train
        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(str(x[0].item())))
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    def test_jasper_eval(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))
        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
        greedy_decoder = nemo_asr.GreedyCTCDecoder()
        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        # logging.info(jasper_encoder)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )
        predictions = greedy_decoder(log_probs=log_probs)

        from nemo.collections.asr.helpers import (
            process_evaluation_batch,
            process_evaluation_epoch,
        )

        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[loss, predictions, transcript, transcript_len],
            user_iter_callback=lambda x, y: process_evaluation_batch(
                x, y, labels=self.labels),
            user_epochs_done_callback=process_evaluation_epoch,
        )
        # Instantiate an optimizer to perform `train` action
        self.nf.eval(callbacks=[eval_callback])
Ejemplo n.º 35
0
    def load(self):
        """Load the data about Stylesheet Assets and the new CSS content."""
        yaml = YAML(typ="safe")
        yaml.register_class(StylesheetData)
        yaml.register_class(StylesheetImageList)
        yaml.register_class(LocalStylesheetImage)
        yaml.register_class(StoredStylesheetImage)
        yaml.register_class(RemoteStylesheetImage)

        logger.debug("Loading serialized StylesheetData class from: "
                     "'{}'".format(self.config["data_file"], ))
        try:
            with open(self.config["data_file"], "r") as yaml_stream:
                self.stylesheet_data = yaml.load(yaml_stream)
        except OSError as error:
            raise FileReadingException(
                error,
                "the Stylesheet Data file",
            ) from error

        logger.debug("Loading CSS content from: '{}'".format(
            self.stylesheet_data.css_file, ))
        try:
            with open(self.stylesheet_data.css_file, "r", encoding="utf-8") \
                    as css_stream:
                self.css_content = css_stream.read()
        except OSError as error:
            raise FileReadingException(
                error,
                "the CSS file",
            ) from error
Ejemplo n.º 36
0
def create_all_dags(args, neural_factory):
    logger = neural_factory.logger
    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)
    vocab = jasper_params['labels']
    sample_rate = jasper_params['sample_rate']

    # Calculate num_workers for dataloader
    total_cpus = os.cpu_count()
    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)

    # perturb_config = jasper_params.get('perturb', None)
    train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"])
    del train_dl_params["train"]
    del train_dl_params["eval"]
    # del train_dl_params["normalize_transcripts"]

    data_layer = nemo_asr.AudioToTextDataLayer(
        manifest_filepath=args.train_dataset,
        sample_rate=sample_rate,
        labels=vocab,
        batch_size=args.batch_size,
        num_workers=cpu_per_traindl,
        **train_dl_params,
        # normalize_transcripts=False
    )

    N = len(data_layer)
    steps_per_epoch = int(N / (args.batch_size * args.num_gpus))
    logger.info('Have {0} examples to train on.'.format(N))

    data_preprocessor = nemo_asr.AudioPreprocessing(
        sample_rate=sample_rate, **jasper_params["AudioPreprocessing"])

    multiply_batch_config = jasper_params.get('MultiplyBatch', None)
    if multiply_batch_config:
        multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config)

    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
    if spectr_augment_config:
        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(
            **spectr_augment_config)

    eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layers_eval = []

    if args.eval_datasets:
        for eval_datasets in args.eval_datasets:
            data_layer_eval = nemo_asr.AudioToTextDataLayer(
                manifest_filepath=eval_datasets,
                sample_rate=sample_rate,
                labels=vocab,
                batch_size=args.eval_batch_size,
                num_workers=cpu_per_traindl,
                **eval_dl_params,
            )

            data_layers_eval.append(data_layer_eval)
    else:
        neural_factory.logger.info("There were no val datasets passed")

    jasper_encoder = nemo_asr.JasperEncoder(
        feat_in=jasper_params["AudioPreprocessing"]["features"],
        **jasper_params["JasperEncoder"])

    jasper_decoder = nemo_asr.JasperDecoderForCTC(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
        num_classes=len(vocab),
        factory=neural_factory)

    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))

    greedy_decoder = nemo_asr.GreedyCTCDecoder()

    logger.info('================================')
    logger.info(
        f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    logger.info(
        f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    logger.info(f"Total number of parameters in decoder: "
                f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
    logger.info('================================')

    # Train DAG
    audio_signal_t, a_sig_length_t, \
        transcript_t, transcript_len_t = data_layer()
    processed_signal_t, p_length_t = data_preprocessor(
        input_signal=audio_signal_t, length=a_sig_length_t)

    if multiply_batch_config:
        processed_signal_t, p_length_t, transcript_t, transcript_len_t = \
            multiply_batch(
                in_x=processed_signal_t, in_x_len=p_length_t,
                in_y=transcript_t,
                in_y_len=transcript_len_t)

    if spectr_augment_config:
        processed_signal_t = data_spectr_augmentation(
            input_spec=processed_signal_t)

    encoded_t, encoded_len_t = jasper_encoder(audio_signal=processed_signal_t,
                                              length=p_length_t)
    log_probs_t = jasper_decoder(encoder_output=encoded_t)
    predictions_t = greedy_decoder(log_probs=log_probs_t)
    loss_t = ctc_loss(log_probs=log_probs_t,
                      targets=transcript_t,
                      input_length=encoded_len_t,
                      target_length=transcript_len_t)

    # Callbacks needed to print info to console and Tensorboard
    train_callback = nemo.core.SimpleLossLoggerCallback(
        tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
        print_func=partial(monitor_asr_train_progress,
                           labels=vocab,
                           logger=logger),
        get_tb_values=lambda x: [("loss", x[0])],
        tb_writer=neural_factory.tb_writer,
    )

    chpt_callback = nemo.core.CheckpointCallback(
        folder=neural_factory.checkpoint_dir,
        step_freq=args.checkpoint_save_freq)

    callbacks = [train_callback, chpt_callback]

    # assemble eval DAGs
    for i, eval_dl in enumerate(data_layers_eval):
        audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e = \
            eval_dl()
        processed_signal_e, p_length_e = data_preprocessor(
            input_signal=audio_signal_e, length=a_sig_length_e)
        encoded_e, encoded_len_e = jasper_encoder(
            audio_signal=processed_signal_e, length=p_length_e)
        log_probs_e = jasper_decoder(encoder_output=encoded_e)
        predictions_e = greedy_decoder(log_probs=log_probs_e)
        loss_e = ctc_loss(log_probs=log_probs_e,
                          targets=transcript_e,
                          input_length=encoded_len_e,
                          target_length=transcript_len_e)

        # create corresponding eval callback
        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[
                loss_e, predictions_e, transcript_e, transcript_len_e
            ],
            user_iter_callback=partial(process_evaluation_batch, labels=vocab),
            user_epochs_done_callback=partial(process_evaluation_epoch,
                                              tag=tagname,
                                              logger=logger),
            eval_step=args.eval_freq,
            tb_writer=neural_factory.tb_writer)

        callbacks.append(eval_callback)
    return loss_t, callbacks, steps_per_epoch
Ejemplo n.º 37
0
def get_yaml(path: str) -> CommentedMap:
    bytes_data = get_data(path)
    # Replace CRLF or yaml loader will load extra lines
    string_data = bytes_data.decode('utf8').replace('\r\n', '\n')
    ret = YAML().load(string_data)
    return ret
Ejemplo n.º 38
0
from datetime import datetime
import os
import pathlib
from subprocess import check_call

from ruamel.yaml import YAML

MINICONDA_VERSION = '4.3.27'

HERE = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))

ENV_FILE = 'environment.yml'
FROZEN_FILE = 'environment.frozen.yml'

yaml = YAML(typ='rt')


def fixup(frozen_file):
    """Fixup a frozen environment file

    Conda export has a bug!
    https://github.com/conda/conda/pull/6391
    """
    with open(frozen_file) as f:
        env = yaml.load(f)

    # scrub spurious pip dependencies
    # due to conda #6391

    # note: this scrubs *all* pip dependencies,
Ejemplo n.º 39
0
                newkey = ''

        lineNum += 1
    return ret_val

with open(os.path.join(ISTIO_IO_DIR, CONFIG_INDEX_DIR), 'r') as f:
    endReached = False

    data = f.read().split('\n')
    for d in data:
        print d
        if "<!-- AUTO-GENERATED-START -->" in d:
            break

    # transform values.yaml into a encoded string dictionary
    yaml = YAML()
    yaml.explicit_start = True
    yaml.dump('', sys.stdout, transform=decode_helm_yaml)

    # Order the encoded string dictionary
    od = collections.OrderedDict(sorted(prdict.items(), key=lambda t: t[0]))

    # Print encoded string dictionary
    for k, v in od.items():
        print("## `%s` options\n" % k)
        print '| Key | Default Value | Description |'
        print '| --- | --- | --- |'
        for value in v:
            print('%s' % (value))
        print('')
Ejemplo n.º 40
0
def pin_dependencies_in_conda_env_file_from_version_spec(
        filepath, versions_to_pin, dry_run=False):
    '''
    Pin package versions to a given spec

    Parameters
    ----------
    filepath : str
        Conda environment yml file to be pinned
    versions_to_pin : dict
        Dictionary of package specs, with keys package sources (e.g. ``conda``,
        ``pip``), and values dictionaries of package names and pinned versions.
    dry_run : bool
        Print the updated environment files, rather than overwriting them. Default
        False.
    '''

    indent_config = dict(mapping=2, sequence=2, offset=2)
    
    yaml = YAML(typ='rt')
    yaml.indent(**indent_config)
    yaml.default_flow_style = False

    with open(filepath, 'r') as f:
        file_spec = yaml.load(f)

    for di, dep in enumerate(file_spec['dependencies']):
        if isinstance(dep, dict):
            for k, v in dep.items():
                for si, subdep in enumerate(v):
                    pinned, comment = _determine_pinned_version(
                        subdep, versions_to_pin[k])

                    file_spec['dependencies'][di][k][si] = pinned
                    if comment is not None:
                        file_spec['dependencies'][di][k].yaml_add_eol_comment(
                            comment, si)
        else:
            pinned, comment = _determine_pinned_version(dep, versions_to_pin['conda'])
            file_spec['dependencies'][di] = pinned
            
            if comment is not None:
                file_spec['dependencies'].yaml_add_eol_comment(
                    comment, di)

    if dry_run:
        sys.stdout.write("filename: {}\n{}\n".format(filepath, '-'*50))
        with YAML(output=sys.stdout) as yaml:
            yaml.indent(**indent_config)
            yaml.dump(file_spec)
        sys.stdout.write("\n")
    else:
        with open(filepath, 'w+') as f:
            yaml.dump(file_spec, f)
Ejemplo n.º 41
0
        "meta", attrs=dict(name="csrf-token"))["content"]
    result = requests.post(
        "https://hackmd.io/new",
        data={"content": source},
        headers={
            "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
            "X-XSRF-Token": token,
            "User-Agent": "HackMD Python client",
        },
        cookies=response.cookies,
    )
    return result.url


if __name__ == "__main__":
    yaml = YAML()
    g = Github(os.getenv("VSF_BOT_TOKEN"))
    with open("../.github/workflows/open_org_issue.yml") as f:
        data = yaml.load(f)
    minutes, hours_utc, *_ = data["on"]["schedule"][0]["cron"].split()

    # Format meeting time for the *upcoming* meeting
    meeting_time = datetime.now() + timedelta(days=7)
    meeting_time = meeting_time.replace(hour=int(hours_utc),
                                        minute=int(minutes))
    formatted_time = (
        f"{meeting_time + timedelta(hours=2):%-H:%M} European "
        f"/ {meeting_time - timedelta(hours=4):%-I:%M %p} Eastern")

    # *Today's* meeting will be next week's previous meeting
    previous_meeting_time = datetime.now().replace(hour=int(hours_utc),
Ejemplo n.º 42
0
def unpin_dependencies_in_conda_env_file(filepath, dry_run=False):
    '''
    Un-pin dependencies in conda environment file
    
    If encounters dependencies with ``# pinkeep: pkg=vers`` directives, these are
    preserved verbatim in the final spec.
    
    Paramters
    ---------
    filepath : str
        Path to the environment file to unpin
    dry_run : bool, optional
        Print rather than modify the environment file
    '''
    
    indent_config = dict(mapping=2, sequence=2, offset=2)
    
    yaml = YAML(typ='rt')
    yaml.indent(**indent_config)
    yaml.default_flow_style = False

    with open(filepath, 'r') as f:
        file_spec = yaml.load(f)

    for di, dep in enumerate(file_spec['dependencies']):
        if isinstance(dep, dict):
            for k, v in dep.items():
                for si, subdep in enumerate(v):
                    file_spec['dependencies'][di][k][si] = _unpin_dependency(
                        file_spec['dependencies'][di][k], si)

        else:
            file_spec['dependencies'][di] = _unpin_dependency(
                file_spec['dependencies'], di)

    if dry_run:
        sys.stdout.write("filename: {}\n{}\n".format(filepath, '-'*50))
        with YAML(output=sys.stdout) as yaml:
            yaml.indent(**indent_config)
            yaml.dump(file_spec)
        sys.stdout.write("\n")
    else:
        with open(filepath, 'w+') as f:
            yaml.dump(file_spec, f)
Ejemplo n.º 43
0
import subprocess
import sys
import tempfile
import pytest

from contextlib import redirect_stderr, redirect_stdout
from pathlib import Path
from textwrap import dedent
from ruamel.yaml import YAML

from auth import KeyProvider
from utils import print_colour
from file_acquisition import get_decrypted_file, get_decrypted_files

# Without `pure=True`, I get an exception about str / byte issues
yaml = YAML(typ="safe", pure=True)
helm_charts_dir = Path(__file__).parent.parent.joinpath("helm-charts")


class Hub:
    """
    A single, deployable JupyterHub
    """
    def __init__(self, cluster, spec):
        self.cluster = cluster
        self.spec = spec

    def get_generated_config(self, auth_provider: KeyProvider, secret_key):
        """
        Generate config automatically for each hub
from ipam.models import Role
from ruamel.yaml import YAML
from pathlib import Path
import sys

file = Path('/opt/netbox/initializers/prefix_vlan_roles.yml')
if not file.is_file():
  sys.exit()

with file.open('r') as stream:
  yaml = YAML(typ='safe')
  roles = yaml.load(stream)

  if roles is not None:
    for params in roles:
      role, created = Role.objects.get_or_create(**params)

      if created:
        print("⛹️‍ Created Prefix/VLAN Role", role.name)
Ejemplo n.º 45
0
from ruamel.yaml import YAML

if len(sys.argv) != 3:
    print("Script call: <name> <switch_item_name> <switch_item_status>")
    sys.exit(1)

switch_item_name = sys.argv[1]
switch_item_status = sys.argv[2]

data_dir = os.path.dirname(os.path.abspath(__file__))
data_dir = os.path.abspath(os.path.dirname(data_dir +
                                           "/../../broadlink-data/"))
#print (data_dir)

content = open(data_dir + "/system.yml", "r").read()
yaml = YAML()
system_dictionary = yaml.load(content)

content = open(data_dir + "/irb.yml", "r").read()
yaml = YAML()
mapping_dictionary = yaml.load(content)

code = mapping_dictionary["mapping_dictionary"][switch_item_name][
    switch_item_status]

operations = ""

if (code != ""):
    operations += "start"

    try:
Ejemplo n.º 46
0
from great_expectations.data_context.util import file_relative_path
from great_expectations.util import lint_code
from great_expectations.validation_operators.types.validation_operator_result import (
    ValidationOperatorResult, )

try:
    from sqlalchemy.exc import SQLAlchemyError
except ImportError:
    SQLAlchemyError = RuntimeError

try:
    from sqlalchemy.exc import SQLAlchemyError
except ImportError:
    SQLAlchemyError = RuntimeError

yaml = YAML()
yaml.indent(mapping=2, sequence=4, offset=2)
"""
--ge-feature-maturity-info--

    id: checkpoint_command_line
    title: LegacyCheckpoint - Command Line
    icon:
    short_description: Run a configured legacy checkpoint from a command line.
    description: Run a configured legacy checkpoint from a command line in a Terminal shell.
    how_to_guide_url: https://docs.greatexpectations.io/en/latest/guides/how_to_guides/validation/how_to_run_a_checkpoint_in_terminal.html
    maturity: Experimental
    maturity_details:
        api_stability: Unstable (expect changes to batch request; no checkpoint store)
        implementation_completeness: Complete
        unit_test_coverage: Complete
Ejemplo n.º 47
0
from scripts import mkimage, createWorkflowYaml, createFat32BootYaml

from workflow.mainConfig import MainConfig

logPath = "./"
fileName = "Disk"
logging.basicConfig(
    level=logging.INFO,
    format=
    "%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(logPath, fileName)),
        logging.StreamHandler()
    ])

yaml = YAML()
yaml.register_class(MainConfig)

mainConfig = None
cwd = os.getcwd()


def readConfig(path):
    global mainConfig

    # deserialize fat config file
    with open(path, 'r') as inp:
        mainConfig = yaml.load(inp)


def prepareDisk():
def update_lastmod(verbose):
    count = 0
    yaml = YAML()

    for post in glob.glob(os.path.join(POSTS_PATH, "*.md")):

        git_log_count = subprocess.getoutput(
            "git log --pretty=%ad {} | wc -l".format(post))

        if git_log_count == "1":
            continue

        git_lastmod = subprocess.getoutput(
            "git log -1 --pretty=%ad --date=iso " + post)

        if not git_lastmod:
            continue

        lates_commit = subprocess.getoutput("git log -1 --pretty=%B " + post)

        if "[Automation]" in lates_commit and "Lastmod" in lates_commit:
            continue

        frontmatter, line_num = get_yaml(post)
        meta = yaml.load(frontmatter)

        if 'seo' in meta:
            if ('date_modified' in meta['seo']
                    and meta['seo']['date_modified'] == git_lastmod):
                continue
            else:
                meta['seo']['date_modified'] = git_lastmod
        else:
            meta.insert(line_num, 'seo', dict(date_modified=git_lastmod))

        output = 'new.md'
        if os.path.isfile(output):
            os.remove(output)

        with open(output, 'w') as new, open(post, 'r') as old:
            new.write("---\n")
            yaml.dump(meta, new)
            new.write("---\n")
            line_num += 2

            lines = old.readlines()

            for line in lines:
                if line_num > 0:
                    line_num -= 1
                    continue
                else:
                    new.write(line)

        shutil.move(output, post)
        count += 1

        if verbose:
            print("[INFO] update 'lastmod' for:" + post)

    if count > 0:
        print("[INFO] Success to update lastmod for {} post(s).".format(count))
Ejemplo n.º 49
0
def _get_yaml():
    y = YAML(typ='safe')
    y.default_flow_style = False
    return y
Ejemplo n.º 50
0
from great_expectations.data_context.templates import CONFIG_VARIABLES_TEMPLATE
from great_expectations.exceptions import ConfigNotFoundError

try:
    from unittest import mock
except ImportError:
    import mock

from six import PY2

from great_expectations.cli import cli
from great_expectations.util import gen_directory_tree_str
from great_expectations import __version__ as ge_version
from .test_utils import assertDeepAlmostEqual
yaml = YAML()
yaml.default_flow_style = False


def test_cli_command_entrance():
    runner = CliRunner()

    result = runner.invoke(cli)
    assert result.exit_code == 0
    assert result.output == """Usage: cli [OPTIONS] COMMAND [ARGS]...

  great_expectations command-line interface

Options:
  --version      Show the version and exit.
  -v, --verbose  Set great_expectations to use verbose output.
Ejemplo n.º 51
0
from django.contrib.auth.models import Group, User
from ruamel.yaml import YAML

with open('/opt/netbox/initializers/groups.yml', 'r') as stream:
    yaml = YAML(typ='safe')
    groups = yaml.load(stream)

    if groups is not None:
        for groupname, group_details in groups.items():
            group, created = Group.objects.get_or_create(name=groupname)

            if created:
                print("👥 Created group", groupname)

            for username in group_details['users']:
                user = User.objects.get(username=username)

                if user:
                    user.groups.add(group)
Ejemplo n.º 52
0
# coding=utf-8

import os, json, io
from flask import Flask, jsonify, request
from flask_cors import CORS

from ruamel.yaml import YAML
yaml_parser = YAML()  #typ="safe"
app = Flask(__name__)
CORS(app)


def get_YAML_string(obj):
    strngio = io.StringIO()
    yaml_parser.dump(obj, strngio)
    strngio.seek(0)
    yamlstr = strngio.read()
    strngio.close()
    return yamlstr


@app.route('/questions/<int:number>')
def get_question(number: int):
    # Number is base 1
    jsonpath = os.path.join(
        os.path.split(os.path.split(__file__)[0])[0], 'static',
        'Questions.json')
    with open(jsonpath, "rt") as opf:
        jsonstring = opf.read()
        qdct = json.loads(jsonstring)
        questionobj = qdct[number - 1]
Ejemplo n.º 53
0
def read_yaml_file(file_path: str) -> dict:
    with open(file_path, 'r', encoding='utf-8') as yaml_file:
        yaml = YAML(typ='safe')
        return yaml.load(yaml_file)
Ejemplo n.º 54
0
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This module handles string related IO.
"""

from io import StringIO

from ruamel.yaml import YAML

yaml = YAML(typ='unsafe')


def read_yaml_str(content: str) -> object:
    """Parse the given yaml str and return the python object."""
    return yaml.load(content)


def to_yaml_str(obj: object) -> str:
    """Converts the given python object into a YAML string."""
    stream = StringIO()
    yaml.dump(obj, stream)
    return stream.getvalue()
Ejemplo n.º 55
0
#
# You should have received a copy of the GNU General Public License
# along with Exhibition.  If not, see <https://www.gnu.org/licenses/>.
#
##

from collections import OrderedDict
from importlib import import_module
import hashlib
import pathlib

from ruamel.yaml import YAML

from .config import Config

yaml_parser = YAML(typ="safe")

DATA_EXTRACTORS = {
    ".yaml": yaml_parser.load,
    ".json": yaml_parser.load,
}

DEFAULT_STRIP_EXTS = [".html"]
DEFAULT_INDEX_FILE = "index.html"


class Node:
    """
    A node represents a file or directory
    """
    _meta_names = ["meta.yaml", "meta.yml"]
def create_all_dags(args, neural_factory):
    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)

    labels = jasper_params['labels']  # Vocab of tokens
    sample_rate = jasper_params['sample_rate']

    # Calculate num_workers for dataloader
    total_cpus = os.cpu_count()
    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)

    # perturb_config = jasper_params.get('perturb', None)
    train_dl_params = copy.deepcopy(
        jasper_params["AudioToSpeechLabelDataLayer"])
    train_dl_params.update(
        jasper_params["AudioToSpeechLabelDataLayer"]["train"])
    del train_dl_params["train"]
    del train_dl_params["eval"]
    # del train_dl_params["normalize_transcripts"]

    # Look for augmentations
    audio_augmentor = jasper_params.get('AudioAugmentor', None)

    data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
        manifest_filepath=args.train_dataset,
        labels=labels,
        sample_rate=sample_rate,
        batch_size=args.batch_size,
        num_workers=cpu_per_traindl,
        augmentor=audio_augmentor,
        **train_dl_params,
    )

    crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(
        audio_length=128)

    N = len(data_layer)
    steps_per_epoch = math.ceil(
        N / (args.batch_size * args.iter_per_step * args.num_gpus))
    logging.info('Steps per epoch : {0}'.format(steps_per_epoch))
    logging.info('Have {0} examples to train on.'.format(N))

    data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(
        sample_rate=sample_rate,
        **jasper_params["AudioToMFCCPreprocessor"],
    )

    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
    if spectr_augment_config:
        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(
            **spectr_augment_config)

    eval_dl_params = copy.deepcopy(
        jasper_params["AudioToSpeechLabelDataLayer"])
    eval_dl_params.update(jasper_params["AudioToSpeechLabelDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layers_eval = []

    if args.eval_datasets:
        for eval_datasets in args.eval_datasets:
            data_layer_eval = nemo_asr.AudioToSpeechLabelDataLayer(
                manifest_filepath=eval_datasets,
                sample_rate=sample_rate,
                labels=labels,
                batch_size=args.eval_batch_size,
                num_workers=cpu_per_traindl,
                **eval_dl_params,
            )

            data_layers_eval.append(data_layer_eval)
    else:
        logging.warning("There were no val datasets passed")

    jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"], )

    jasper_decoder = nemo_asr.JasperDecoderForClassification(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
        num_classes=len(labels),
        **jasper_params['JasperDecoderForClassification'],
    )

    ce_loss = nemo_asr.CrossEntropyLossNM()

    logging.info('================================')
    logging.info(
        f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    logging.info(
        f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    logging.info(f"Total number of parameters in model: "
                 f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
    logging.info('================================')

    # Train DAG
    # --- Assemble Training DAG --- #
    audio_signal, audio_signal_len, commands, command_len = data_layer()

    processed_signal, processed_signal_len = data_preprocessor(
        input_signal=audio_signal, length=audio_signal_len)

    processed_signal, processed_signal_len = crop_pad_augmentation(
        input_signal=processed_signal, length=audio_signal_len)

    if spectr_augment_config:
        processed_signal = data_spectr_augmentation(
            input_spec=processed_signal)

    encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                          length=processed_signal_len)

    decoded = jasper_decoder(encoder_output=encoded)

    loss = ce_loss(logits=decoded, labels=commands)

    # Callbacks needed to print info to console and Tensorboard
    train_callback = nemo.core.SimpleLossLoggerCallback(
        # Notice that we pass in loss, predictions, and the labels (commands).
        # Of course we would like to see our training loss, but we need the
        # other arguments to calculate the accuracy.
        tensors=[loss, decoded, commands],
        # The print_func defines what gets printed.
        print_func=partial(monitor_classification_training_progress,
                           eval_metric=None),
        get_tb_values=lambda x: [("loss", x[0])],
        tb_writer=neural_factory.tb_writer,
    )

    chpt_callback = nemo.core.CheckpointCallback(
        folder=neural_factory.checkpoint_dir,
        load_from_folder=args.load_dir,
        step_freq=args.checkpoint_save_freq,
    )

    callbacks = [train_callback, chpt_callback]

    # assemble eval DAGs
    for i, eval_dl in enumerate(data_layers_eval):
        # --- Assemble Training DAG --- #
        test_audio_signal, test_audio_signal_len, test_commands, test_command_len = eval_dl(
        )

        test_processed_signal, test_processed_signal_len = data_preprocessor(
            input_signal=test_audio_signal, length=test_audio_signal_len)

        test_processed_signal, test_processed_signal_len = crop_pad_augmentation(
            input_signal=test_processed_signal,
            length=test_processed_signal_len)

        test_encoded, test_encoded_len = jasper_encoder(
            audio_signal=test_processed_signal,
            length=test_processed_signal_len)

        test_decoded = jasper_decoder(encoder_output=test_encoded)

        test_loss = ce_loss(logits=test_decoded, labels=test_commands)

        # create corresponding eval callback
        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[test_loss, test_decoded, test_commands],
            user_iter_callback=partial(process_classification_evaluation_batch,
                                       top_k=1),
            user_epochs_done_callback=partial(
                process_classification_evaluation_epoch,
                eval_metric=1,
                tag=tagname),
            eval_step=args.
            eval_freq,  # How often we evaluate the model on the test set
            tb_writer=neural_factory.tb_writer,
        )

        callbacks.append(eval_callback)
    return loss, callbacks, steps_per_epoch
    def _make_split_nuscenes(self):
        """
        Use generated <self.voxel_version> output to build split.
        """

        assert self._input_format == "nuscenes"

        self.sample_id_template = "nuscenes_lidarseg_{seq:04d}_{frame:04d}"

        self._seq_format = lambda x: "{:04d}".format(x)
        self._frame_format = lambda x: "{:05d}".format(x)
        self._label_format = lambda x: "{:05d}".format(x)
        self._voxel_format = lambda x: "{:06d}".format(x)

        # Todo: no test split option for now
        assert self.testset_flag is False
        valid_splits = ["train", "valid"]
        map_split_names = {"train": "train", "valid": "val", "test": "test"}
        # read config
        with open(str(self.config_semantic), "r") as file_conf_sem:
            yaml = YAML()
            data = yaml.load(file_conf_sem)
            self._config_data = {k: dict(v) for k, v in data.items()}

        data_splits = {
            map_split_names[k]: v
            for k, v in self._config_data["split"].items()
            if k in valid_splits
        }
        self._split = {
            "name": "nuscenes_voxels_{}".format(
                "default" if not self.testset_flag else "test"
            ),
            "data": {k: [] for k in data_splits.keys()},
        }

        self._samples_to_generate = []

        def parse_sequence_folder_name(x):
            try:
                return int(x)
            except ValueError:
                return -1

        voxel_sequences = {
            parse_sequence_folder_name(x.name): x
            for x in self.semantic_kitti_voxels_root.iterdir()
        }

        for split_name, sequences in data_splits.items():
            split_data = self._split["data"][split_name]
            for sequence_index in sequences:
                if not self.testset_flag:

                    if sequence_index not in voxel_sequences:
                        logger.warning(
                            "Sequence "
                            + self._seq_format(sequence_index)
                            + " not available. Skipping."
                        )
                        continue

                    voxel_dir = voxel_sequences[sequence_index] / self.voxel_version
                    if not voxel_dir.is_dir():
                        logger.warning(
                            "Voxels not available in sequence "
                            + self._seq_format(sequence_index)
                            + ". Skipping."
                        )
                        continue

                    self._voxel_data_cache[sequence_index] = {
                        int(x.stem[:6]): x
                        for x in (
                            voxel_sequences[sequence_index] / self.voxel_version
                        ).iterdir()
                        if x.suffix == ".tfrecord"
                    }

                    split_data.extend(
                        [
                            self.sample_id_template.format(seq=sequence_index, frame=x)
                            for x in sorted(
                                list(self._voxel_data_cache[sequence_index].keys())
                            )
                        ]
                    )
                    self._samples_to_generate.extend(
                        [
                            (sequence_index, x)
                            for x in sorted(
                                list(self._voxel_data_cache[sequence_index].keys())
                            )
                        ]
                    )
                else:
                    raise NotImplementedError()

        self._label_mapping: dict = self._config_data["learning_map"]
        # make 255 the 'unlabeled' label and shift all others down (-1) accordingly
        self._label_mapping = {
            k: v - 1 if v != 0 else 255 for k, v in self._label_mapping.items()
        }
        self._label_mapping_voxels = self._label_mapping.copy()
        # map unlabeled to extra entry 254 when voxelizing
        # Todo(risteon): Is this better?
        # -> Map noise to 254, this will get removed when parsing
        # -> Map unlabed to 255 to keep for geometry training
        unlabeled_index_nuscenes = 32
        self._label_mapping[unlabeled_index_nuscenes] = 255
        self._label_mapping_voxels[0] = 254
        self._label_mapping_voxels[unlabeled_index_nuscenes] = 255

        assert all(x <= 255 for x in self._label_mapping.values())
        assert all(x <= 255 for x in self._label_mapping_voxels.values())

        self._label_mapping = np.vectorize(self._label_mapping.get, otypes=[np.int64])
        self._label_mapping_voxels = np.vectorize(
            self._label_mapping_voxels.get, otypes=[np.int64]
        )
Ejemplo n.º 58
0
    def _run_core_command(
        self,
        patterns_json: List[Any],
        patterns: List[Pattern],
        targets: List[Path],
        language: Language,
        rule: Rule,
        rules_file_flag: str,
        cache_dir: str,
    ) -> dict:
        with tempfile.NamedTemporaryFile(
            "w"
        ) as pattern_file, tempfile.NamedTemporaryFile(
            "w"
        ) as target_file, tempfile.NamedTemporaryFile(
            "w"
        ) as equiv_file:
            yaml = YAML()
            yaml.dump({"rules": patterns_json}, pattern_file)
            pattern_file.flush()
            target_file.write("\n".join(str(t) for t in targets))
            target_file.flush()

            cmd = [SEMGREP_PATH] + [
                "-lang",
                language,
                rules_file_flag,
                pattern_file.name,
                "-j",
                str(self._jobs),
                "-target_file",
                target_file.name,
                "-use_parsing_cache",
                cache_dir,
                "-timeout",
                str(self._timeout),
                "-max_memory",
                str(self._max_memory),
            ]

            equivalences = rule.equivalences
            if equivalences:
                self._write_equivalences_file(equiv_file, equivalences)
                cmd += ["-equivalences", equiv_file.name]

            core_run = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            logger.debug(core_run.stderr.decode("utf-8", "replace"))

            if core_run.returncode != 0:
                output_json = self._parse_core_output(core_run.stdout)

                if "error" in output_json:
                    self._raise_semgrep_error_from_json(output_json, patterns)
                else:
                    raise SemgrepError(
                        f"unexpected json output while invoking semgrep-core:\n{PLEASE_FILE_ISSUE_TEXT}"
                    )

            output_json = self._parse_core_output(core_run.stdout)

            return output_json
Ejemplo n.º 59
0
class Representer(RoundTripRepresenter):
    pass


Representer.add_representer(OrderedDict, Representer.represent_dict)


def wrap_yaml_string(s, width=100):
    ss = (l.rstrip() for l in s.splitlines())
    ss = (l for l in ss if l)
    #ss = textwrap.wrap('\n'.join(ss), width=width, drop_whitespace=False, tabsize=2)
    return PreservedScalarString('\n'.join(ss))


yaml = YAML(typ='rt')
yaml.Representer = Representer
yaml.compact()
yaml.default_flow_style = False


def yaml_dumps(document):
    stream = StringIO()
    yaml.dump(document, stream)
    return stream.getvalue()


def write_yaml(dir_, fn, data):
    if not os.path.exists(dir_):
        os.makedirs(dir_)
    with open(os.path.join(dir_, fn), 'w') as f:
Ejemplo n.º 60
0
def write(model_name, data, output_dir):
    """Write data structure to YAML and csv
    """
    project_data, intervals, interventions, units, model_data, extra = data

    yaml = YAML()

    # project
    with open(project_yaml_file(output_dir), 'w',
              encoding='utf-8') as project_file:
        yaml.dump(project_data, project_file)

    # intervals
    intervals_filename = os.path.join(output_dir, 'data',
                                      'interval_definitions',
                                      '{}_intervals.csv'.format(model_name))
    with open(intervals_filename, 'w', encoding='utf-8',
              newline='') as intervals_file:
        fieldnames = ('id', 'start_hour', 'end_hour')
        writer = csv.DictWriter(intervals_file, fieldnames)
        writer.writeheader()
        writer.writerows(intervals)

    # interventions
    interventions_filename = os.path.join(
        output_dir, 'data', 'interventions',
        '{}_interventions.yml'.format(model_name))
    with open(interventions_filename, 'w',
              encoding='utf-8') as interventions_file:
        yaml.dump(interventions, interventions_file)

    # units
    units_filename = os.path.join(output_dir, 'data',
                                  '{}_units.txt'.format(model_name))
    with open(units_filename, 'w', encoding='utf-8', newline='') as units_file:
        fieldnames = ('unit_name', 'description')
        writer = csv.DictWriter(units_file, fieldnames, delimiter='=')
        writer.writeheader()
        writer.writerows(units)

    # model
    model_filename = os.path.join(output_dir, 'config', 'sector_models',
                                  '{}.yml'.format(model_name))
    with open(model_filename, 'w', encoding='utf-8') as model_file:
        yaml.dump(model_data, model_file)

    # wrapper
    wrapper_parameters = ''
    for parameter in model_data['parameters']:
        identifier = clean('parameter_' + str(parameter['name']))
        wrapper_parameters += '{0} = data.get_parameter(\'{1}\')\n\t\t'.format(
            identifier, parameter['name'])
        wrapper_parameters += 'self.logger.info(\'Parameter {1}: %s\', {0})\n\t\t'.format(
            identifier,
            str(parameter['name']).replace("_", " ").capitalize())

    wrapper_inputs = ''
    for input in model_data['inputs']:
        identifier = clean('input_' + str(input['name']))
        wrapper_inputs += '{0} = data.get_data("{1}")\n\t\t'.format(
            identifier, input['name'])
        wrapper_inputs += 'self.logger.info(\'Input {1}: %s\', {0})\n\t\t'.format(
            identifier,
            str(input['name']).replace("_", " ").capitalize())

    wrapper_outputs = ''
    for output in model_data['outputs']:
        wrapper_outputs += 'data.set_results("{0}", None)\n\t\t'.format(
            output['name'])

    # ensure models dir exists
    try:
        os.mkdir(os.path.join(output_dir, 'models'))
    except FileExistsError:
        pass

    with open(WRAPPER_TEMPLATE, 'r') as source, open(
            os.path.join(output_dir, 'models', '{}.py'.format(model_name)),
            'w') as sink:
        for line in source.readlines():
            sink.write(
                line.format(model_name=model_name,
                            model_name_rm_=model_name.replace("_", " "),
                            model_name_cap=model_name.replace(
                                "_", " ").capitalize(),
                            model_parameters=wrapper_parameters,
                            model_inputs=wrapper_inputs,
                            model_outputs=wrapper_outputs))

    # extras
    for sheet_name, data in extra.items():
        filename = os.path.join(output_dir,
                                '{}__{}.yml'.format(model_name, sheet_name))
        with open(filename, 'w', encoding='utf-8') as file_handle:
            yaml.dump(data, file_handle)