Ejemplo n.º 1
0
async def test_lovelace_update_view(hass, hass_ws_client):
    """Test update_view command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')
    origyaml = yaml.load(TEST_YAML_A)

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=origyaml), \
        patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/update',
            'view_id': 'example',
            'view_config': 'id: example2\ntitle: New title\n',
        })
        msg = await client.receive_json()

    result = save_yaml_mock.call_args_list[0][0][1]
    orig_view = origyaml.mlget(['views', 0], list_ok=True)
    new_view = result.mlget(['views', 0], list_ok=True)
    assert new_view['title'] == 'New title'
    assert new_view['cards'] == orig_view['cards']
    assert 'theme' not in new_view
    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
Ejemplo n.º 2
0
async def test_lovelace_move_card_view_position(hass, hass_ws_client):
    """Test move_card to view with position command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
        patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/card/move',
            'card_id': 'test',
            'new_view_id': 'example',
            'new_position': 1,
        })
        msg = await client.receive_json()

    result = save_yaml_mock.call_args_list[0][0][1]
    assert result.mlget(['views', 0, 'cards', 1, 'title'],
                        list_ok=True) == 'Test card'
    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
Ejemplo n.º 3
0
    def parse(self, input):
        """parse the given file or file source string"""
        if hasattr(input, 'name'):
            self.filename = input.name
        elif not getattr(self, 'filename', ''):
            self.filename = ''
        if hasattr(input, "read"):
            src = input.read()
            input.close()
            input = src
        if isinstance(input, bytes):
            input = input.decode('utf-8')
        yaml = YAML()
        try:
            self._file = yaml.load(input)
        except YAMLError as e:
            message = e.problem if hasattr(e, 'problem') else e.message
            if hasattr(e, 'problem_mark'):
                message += ' {0}'.format(e.problem_mark)
            raise base.ParseError(message)

        self._file = self.preprocess(self._file)

        for k, data in self._flatten(self._file):
            unit = self.UnitClass(data)
            unit.setid(k)
            self.addunit(unit)
    def handle(self, *args, **options):
        def flatten(l):
            return [item for sublist in l for item in sublist]

        yaml = YAML()
        with open(options['yaml']) as yamlfile:
            data = yaml.load(yamlfile)

        for attribute in flatten(data['attributes'].values()):
            SuomiFiUserAttribute.objects.update_or_create(
                friendly_name=attribute['friendly_name'],
                uri=attribute['uri'],
                name=attribute['name'],
                description=attribute['description']
            )

        for level, details in data['access_levels'].items():
            access_level, created = SuomiFiAccessLevel.objects.update_or_create(shorthand=level)
            for language, name in details['name'].items():
                access_level.set_current_language(language)
                access_level.name = name
            for language, description in details['description'].items():
                access_level.set_current_language(language)
                access_level.description = description
            for attribute in flatten(details['fields']):
                access_level.attributes.add(SuomiFiUserAttribute.objects.get(friendly_name=attribute['friendly_name']))
            access_level.save()
Ejemplo n.º 5
0
def mocked_config_file_path(
    fake_temp_data_pocketsphinx_dic, fake_temp_data_pocketsphinx_lm, tmpdir_factory
):
    path_to_pocketsphix_dic = os.path.join(
        str(fake_temp_data_pocketsphinx_dic), "fake.dic"
    )
    path_to_pocketsphix_lm = os.path.join(
        str(fake_temp_data_pocketsphinx_lm), "fake.lm"
    )
    # config part
    base = tempfile.mkdtemp()
    config_file = os.path.join(base, "config.yaml")

    yaml = YAML()

    m_cfg = yaml.load(COMMON_MOCKED_CONFIG)
    m_cfg["pocketsphinx"]["dic"] = path_to_pocketsphix_dic
    m_cfg["pocketsphinx"]["lm"] = path_to_pocketsphix_lm

    with open(config_file, "w", encoding="utf-8") as fp:
        yaml.dump(m_cfg, fp)

    yield config_file

    shutil.rmtree(base)
Ejemplo n.º 6
0
Archivo: misc.py Proyecto: mjirik/io3d
def obj_from_file(filename='annotation.yaml', filetype='auto'):
    ''' Read object from file '''

    if filetype == 'auto':
        _, ext = os.path.splitext(filename)
        filetype = ext[1:]

    if filetype in ('yaml', 'yml'):
        from ruamel.yaml import YAML
        yaml = YAML(typ="unsafe")
        with open(filename, encoding="utf-8") as f:
            obj = yaml.load(f)
        if obj is None:
            obj = {}
        # import yaml
        # with open(filename, encoding="utf-8") as f:
        #     intext = f.read()
        #     obj = yaml.load(intext)
    elif filetype in ('pickle', 'pkl', 'pklz', 'picklezip'):
        fcontent = read_pkl_and_pklz(filename)
        # import pickle
        if sys.version_info[0] < 3:
            import cPickle as pickle
        else:
            import _pickle as pickle
        # import sPickle as pickle
        if sys.version_info.major == 2:
            obj = pickle.loads(fcontent)
        else:
            obj = pickle.loads(fcontent, encoding="latin1")
    else:
        logger.error('Unknown filetype ' + filetype)
    return obj
Ejemplo n.º 7
0
async def test_lovelace_get_view(hass, hass_ws_client):
    """Test get_view command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)):
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/get',
            'view_id': 'example',
        })
        msg = await client.receive_json()

    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
    assert "".join(msg['result'].split()) == "".join('title: Example\n # \
                             Optional unique id for direct\
                             access /lovelace/${id}\nid: example\n # Optional\
                             background (overwrites the global background).\n\
                             background: radial-gradient(crimson, skyblue)\n\
                             # Each view can have a different theme applied.\n\
                             theme: dark-mode\n'.split())
Ejemplo n.º 8
0
 def test_to_file(self):
     filename = "ff_test.yaml"
     b = self.benzene
     b.to_file(filename=filename)
     yaml = YAML(typ="safe")
     with open(filename, "r") as f:
         d = yaml.load(f)
     self.assertListEqual(d["mass_info"], [list(m) for m in b.mass_info])
     self.assertListEqual(d["pair_coeffs"], b.pair_coeffs)
Ejemplo n.º 9
0
 def test_to_file(self):
     filename = "ff_test.yaml"
     v = self.virus
     v.to_file(filename=filename)
     yaml = YAML(typ="safe")
     with open(filename, "r") as f:
         d = yaml.load(f)
     self.assertListEqual(d["mass_info"], [list(m) for m in v.mass_info])
     self.assertListEqual(d["nonbond_coeffs"], v.nonbond_coeffs)
Ejemplo n.º 10
0
def test_id_not_changed():
    """Test if id is not changed if already exists."""
    yaml = YAML(typ='rt')

    fname = "dummy.yaml"
    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_B)), \
            patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        migrate_config(fname)
    assert save_yaml_mock.call_count == 0
Ejemplo n.º 11
0
class TestYAML(unittest.TestCase):
    """Test lovelace.yaml save and load."""

    def setUp(self):
        """Set up for tests."""
        self.tmp_dir = mkdtemp()
        self.yaml = YAML(typ='rt')

    def tearDown(self):
        """Clean up after tests."""
        for fname in os.listdir(self.tmp_dir):
            os.remove(os.path.join(self.tmp_dir, fname))
        os.rmdir(self.tmp_dir)

    def _path_for(self, leaf_name):
        return os.path.join(self.tmp_dir, leaf_name+".yaml")

    def test_save_and_load(self):
        """Test saving and loading back."""
        fname = self._path_for("test1")
        open(fname, "w+")
        util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A))
        data = util_yaml.load_yaml(fname, True)
        assert data == self.yaml.load(TEST_YAML_A)

    def test_overwrite_and_reload(self):
        """Test that we can overwrite an existing file and read back."""
        fname = self._path_for("test2")
        open(fname, "w+")
        util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_A))
        util_yaml.save_yaml(fname, self.yaml.load(TEST_YAML_B))
        data = util_yaml.load_yaml(fname, True)
        assert data == self.yaml.load(TEST_YAML_B)

    def test_load_bad_data(self):
        """Test error from trying to load unserialisable data."""
        fname = self._path_for("test3")
        with open(fname, "w") as fh:
            fh.write(TEST_BAD_YAML)
        with pytest.raises(HomeAssistantError):
            util_yaml.load_yaml(fname, True)
Ejemplo n.º 12
0
def test_save_yaml_model(tmpdir, mini_model):
    jsonschema = pytest.importorskip("jsonschema")
    """Test the writing of YAML model."""
    output_file = tmpdir.join("mini.yml")
    cio.save_yaml_model(mini_model, output_file.strpath, sort=True)
    # validate against schema
    yaml = YAML(typ="unsafe")
    with open(output_file.strpath, "r") as infile:
        yaml_to_dict = yaml.load(infile)
    dict_to_json = json.dumps(yaml_to_dict)
    loaded = json.loads(dict_to_json)
    assert jsonschema.validate(loaded, cio.json.json_schema)
Ejemplo n.º 13
0
    def from_file(cls, filename):
        """
        Constructor that reads in a file in YAML format.

        Args:
            filename (str): Filename.

        """
        yaml = YAML(typ="safe")
        with open(filename, "r") as f:
            d = yaml.load(f)
        return cls.from_dict(d)
Ejemplo n.º 14
0
def edit_tmpvault(filename):
    '''Update yaml config and by changing any key with the value CHANGE_AND_REKEY

    requests a master password and uses pbkdf2 to get a master key to base all
    of the new keys off of
    '''
    yaml = YAML()
    with open(filename) as fobj:
        vault_dict = yaml.load(fobj)
    master_pass = getpass.getpass("Enter master key to generate values: ").encode('utf-8')
    master_key = hashlib.pbkdf2_hmac('sha256', master_pass, os.urandom(16), 100000)
    change_values(vault_dict, 'CHANGE_AND_REKEY', master_key)
    with open(filename, 'w') as fobj:
        yaml.dump(vault_dict, fobj)
Ejemplo n.º 15
0
def vt2esofspy(vesseltree, outputfilename="tracer.txt", axisorder=[0, 1, 2]):
    """
    exports vesseltree to esofspy format

    :param vesseltree: filename or vesseltree dictionary structure
    :param outputfilename: output file name
    :param axisorder: order of axis can be specified with this option
    :return:
    """

    if (type(vesseltree) == str) and os.path.isfile(vesseltree):
        from ruamel.yaml import YAML
        yaml = YAML(typ="unsafe")
        with open(vesseltree, encoding="utf-8") as f:
            vt = yaml.load(f)
    else:
        vt = vesseltree
    logger.debug(str(vt['general']))
    logger.debug(str(vt.keys()))
    vtgm = vt['graph']['microstructure']
    lines = []
    vs = vt['general']['voxel_size_mm']
    sh = vt['general']['shape_px']

    # switch axis
    ax = axisorder

    lines.append("#Tracer+\n")
    lines.append("#voxelsize mm %f %f %f\n" % (vs[ax[0]], vs[ax[1]], vs[ax[2]]))
    lines.append("#shape %i %i %i\n" % (sh[ax[0]], sh[ax[1]], sh[ax[2]]))
    lines.append(str(len(vtgm) * 2)+"\n")

    i = 1
    for id in vtgm:
        try:
            nda = vtgm[id]['nodeA_ZYX']
            ndb = vtgm[id]['nodeB_ZYX']
            lines.append("%i\t%i\t%i\t%i\n" % (nda[ax[0]], nda[ax[1]], nda[ax[2]], i))
            lines.append("%i\t%i\t%i\t%i\n" % (ndb[ax[0]], ndb[ax[1]], ndb[ax[2]], i))
            i += 1
        except:
            pass


    lines.append("%i\t%i\t%i\t%i" % (0, 0, 0, 0))
    lines[3] = str(i - 1) + "\n"
    from builtins import str as text
    with open(outputfilename, 'wt') as f:
        for line in lines:
            f.write(text(line))
Ejemplo n.º 16
0
def test_add_id():
    """Test if id is added."""
    yaml = YAML(typ='rt')

    fname = "dummy.yaml"
    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
            patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        migrate_config(fname)

    result = save_yaml_mock.call_args_list[0][0][1]
    assert 'id' in result['views'][0]['cards'][0]
    assert 'id' in result['views'][1]
Ejemplo n.º 17
0
    def loadtestDictsFromFilePaths(self, testFilePaths):
        """Parses yaml files from given filepaths

        :param testFilePaths: file names to parse
        :type testFilePaths: list of strings
        :return: list of dict parsed from the yaml
        :rtype: list of dicts
        """

        testDicts = []
        yaml = YAML()
        for testFile in testFilePaths:
            with open(testFile) as f:
                testDict = yaml.load(f)
            testDicts.append(dict(testDict))
        return testDicts
Ejemplo n.º 18
0
def get_default_opttask_kwargs():
    """
    Get the default configuration kwargs for OptTask.

    Args:
        None

    Returns:
        conf_dict (dict): The default kwargs for OptTask

    """
    cwd = os.path.dirname(os.path.realpath(__file__))
    fname = os.path.join(cwd, "defaults.yaml")
    with open(fname, 'r') as config_raw:
        yaml = YAML()
        conf_dict = dict(yaml.load(config_raw))
    return conf_dict
Ejemplo n.º 19
0
async def test_lovelace_get_card(hass, hass_ws_client):
    """Test get_card command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)):
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/card/get',
            'card_id': 'test',
        })
        msg = await client.receive_json()

    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
    assert msg['result'] == 'id: test\ntype: entities\ntitle: Test card\n'
Ejemplo n.º 20
0
async def test_lovelace_get_view_not_found(hass, hass_ws_client):
    """Test get_card command cannot find card."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)):
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/get',
            'view_id': 'not_found',
        })
        msg = await client.receive_json()

    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success'] is False
    assert msg['error']['code'] == 'view_not_found'
Ejemplo n.º 21
0
    def _readConfig(self, fileName):
        try:
            with open(fileName, "r") as config:
                yaml = YAML()
                configData = yaml.load(config)
                if not configData:
                    configData = {}
        except Exception as e:
            raise ConfigError(fileName, e)

        if "include" in configData:
            for fileName in configData["include"]:
                includeConfig = self._readConfig(fileName)
                for key, val in includeConfig.iteritems():
                    if key not in configData:
                        configData[key] = val
                    elif not isinstance(configData[key], basestring): # Let's try to merge them if they're collections
                        if isinstance(val, basestring):
                            raise ConfigError(fileName, "The included configuration file tried to merge a non-string "
                                                        "with a string.")
                        try: # Make sure both things we're merging are still iterable types (not numbers or whatever)
                            iter(configData[key])
                            iter(val)
                        except TypeError:
                            pass # Just don't merge them if they're not
                        else:
                            try:
                                configData[key] += val # Merge with the + operator
                            except TypeError: # Except that some collections (dicts) can't
                                try:
                                    for subkey, subval in val.iteritems(): # So merge them manually
                                        if subkey not in configData[key]:
                                            configData[key][subkey] = subval
                                except (AttributeError, TypeError):
                                    # If either of these, they weren't both dicts (but were still iterable);
                                    # requires user to resolve
                                    raise ConfigError(fileName, "The variable {} could not be successfully merged "
                                                                "across files.".format(key))
            del configData["include"]
        return configData
Ejemplo n.º 22
0
async def test_lovelace_update_card_bad_yaml(hass, hass_ws_client):
    """Test update_card command bad yaml."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
        patch('homeassistant.util.ruamel_yaml.yaml_to_object',
              side_effect=HomeAssistantError):
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/card/update',
            'card_id': 'test',
            'card_config': 'id: test\ntype: glance\n',
        })
        msg = await client.receive_json()

    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success'] is False
    assert msg['error']['code'] == 'error'
Ejemplo n.º 23
0
async def test_lovelace_add_view(hass, hass_ws_client):
    """Test add_view command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
        patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/add',
            'view_config': 'id: test\ntitle: added\n',
        })
        msg = await client.receive_json()

    result = save_yaml_mock.call_args_list[0][0][1]
    assert result.mlget(['views', 2, 'title'],
                        list_ok=True) == 'added'
    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
Ejemplo n.º 24
0
async def test_lovelace_update_card(hass, hass_ws_client):
    """Test update_card command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
        patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/card/update',
            'card_id': 'test',
            'card_config': 'id: test\ntype: glance\n',
        })
        msg = await client.receive_json()

    result = save_yaml_mock.call_args_list[0][0][1]
    assert result.mlget(['views', 1, 'cards', 0, 'type'],
                        list_ok=True) == 'glance'
    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
Ejemplo n.º 25
0
async def test_lovelace_delete_view(hass, hass_ws_client):
    """Test delete_card command."""
    await async_setup_component(hass, 'lovelace')
    client = await hass_ws_client(hass)
    yaml = YAML(typ='rt')

    with patch('homeassistant.util.ruamel_yaml.load_yaml',
               return_value=yaml.load(TEST_YAML_A)), \
        patch('homeassistant.util.ruamel_yaml.save_yaml') \
            as save_yaml_mock:
        await client.send_json({
            'id': 5,
            'type': 'lovelace/config/view/delete',
            'view_id': 'example',
        })
        msg = await client.receive_json()

    result = save_yaml_mock.call_args_list[0][0][1]
    views = result.get('views', [])
    assert len(views) == 1
    assert views[0]['title'] == 'Second view'
    assert msg['id'] == 5
    assert msg['type'] == TYPE_RESULT
    assert msg['success']
Ejemplo n.º 26
0
 def parser_from_buffer(cls, fp):
     """Construct YamlParser from a file pointer."""
     yaml = YAML(typ="safe")
     return cls(yaml.load(fp))
Ejemplo n.º 27
0
    mask = connected_components == largest_component_label
    return mask.astype(float)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Data processing')
    parser.add_argument('-c',
                        '--config',
                        default='config.json',
                        type=str,
                        help='Path to the config file (default: config.json)')
    args = parser.parse_args()

    yaml = YAML(typ='safe')
    with open('config.yaml', encoding='utf-8') as file:
        config = yaml.load(file)  # 为列表类型
    save_pickle(
        config["path"] + "/DRIVE/training",
        *data_process(config["path"],
                      name="DRIVE",
                      mode="training",
                      **config["data_process"]))
    save_pickle(
        config["path"] + "/DRIVE/test",
        *data_process(config["path"],
                      name="DRIVE",
                      mode="test",
                      **config["data_process"]))
    save_pickle(
        config["path"] + "/CHASEDB1",
        *data_process(config["path"],
Ejemplo n.º 28
0
from ipam.models import VLAN, VLANGroup, Role
from ipam.constants import VLAN_STATUS_CHOICES
from tenancy.models import Tenant, TenantGroup
from extras.models import CustomField, CustomFieldValue
from ruamel.yaml import YAML

from pathlib import Path
import sys

file = Path('/opt/netbox/initializers/vlans.yml')
if not file.is_file():
    sys.exit()

with file.open('r') as stream:
    yaml = YAML(typ='safe')
    vlans = yaml.load(stream)

    optional_assocs = {
        'site': (Site, 'name'),
        'tenant': (Tenant, 'name'),
        'tenant_group': (TenantGroup, 'name'),
        'group': (VLANGroup, 'name'),
        'role': (Role, 'name')
    }

    if vlans is not None:
        for params in vlans:
            custom_fields = params.pop('custom_fields', None)

            for assoc, details in optional_assocs.items():
                if assoc in params:
Ejemplo n.º 29
0
def getconfig():
    dados = open('config.yaml', 'r').read()
    yaml = YAML()
    data = yaml.load(dados)
    cdata = json.loads(json.dumps(data))
    return cdata
                    "%s has values (%s) not found in owl entity dictionaries t (%s): "
                    % (field.full_path, str(
                        quoted.difference(owl_entities)), str(owl_entities)))
                stat = False
    else:
        warnings.warn("Pattern has no text fields")
    return stat


schema_url = 'https://raw.githubusercontent.com/dosumis/dead_simple_owl_design_patterns/master/spec/DOSDP_schema_full.yaml'

dosdp_full_text = requests.get(schema_url)

ryaml = YAML(typ='safe')

dosdp = ryaml.load(dosdp_full_text.text)
# TODO - Add better parsing for ryaml execptions.

v = Draft7Validator(dosdp)

pattern_docs = glob.glob(sys.argv[1] + "*.yaml")
pattern_docs.extend(glob.glob(sys.argv[1] + "*.yml"))
stat = True
for pattern_doc in pattern_docs:
    warnings.warn("Checking %s" % pattern_doc)
    file = open(pattern_doc, "r")
    pattern = ryaml.load(file.read())
    if not test_jschema(v, pattern): stat = False
    if not test_vars(pattern): stat = False
    if not test_text_fields(pattern): stat = False
Ejemplo n.º 31
0
                # Get the path to the definition file.
                path_to_file = os.path.join(dirpath, def_file)
                # Get the type of HTTP method from the file name by removing the ".yaml" file
                # extension from the string.
                http_method = def_file[:-len(".yaml")]
                # File names for our resource definitions should be in the form of:
                # "get.yaml"
                # "post.yaml"
                # or some other RESTful HTTP method that we support in our API. If the current
                # file being processed isn't like that, then we skip over that file since it's
                # not a resource definition.
                if http_method not in ALLOWED_HTTP_METHODS:
                    continue
                # Load the yaml data from the current file being processed
                with open(path_to_file, "r") as yaml_file:
                    definition = yaml.load(yaml_file)
                    resource_url_path = RESOURCE_PATH_LOOKUP[resource_name]
                    # Set the value of the resource path in the base dictionary to the
                    # definition we loaded from the yaml file. It will look something like this
                    # in the paths dictionary:
                    #
                    # paths:
                    #   /:
                    #     get:
                    #       ...
                    openapi_definition['paths'][resource_url_path][
                        http_method] = definition

    # Get the date and time that the script was run, then generate a name for the definition
    # file.
    # We generate the file name with the date and time so that we can keep different versions
Ejemplo n.º 32
0
class SavedBundleConfig(object):
    def __init__(self, bento_service=None, kind="BentoService"):
        self.kind = kind
        self._yaml = YAML()
        self._yaml.default_flow_style = False
        self.config = self._yaml.load(
            BENTOML_CONFIG_YAML_TEPMLATE.format(
                kind=self.kind,
                bentoml_version=get_bentoml_deploy_version(),
                created_at=str(datetime.utcnow()),
            )
        )

        if bento_service is not None:
            self.config["metadata"].update(
                {
                    "service_name": bento_service.name,
                    "service_version": bento_service.version,
                }
            )
            self.config["env"] = bento_service.env.to_dict()
            self.config['apis'] = _get_apis_list(bento_service)
            self.config['artifacts'] = _get_artifacts_list(bento_service)

    def write_to_path(self, path, filename="bentoml.yml"):
        return self._yaml.dump(self.config, Path(os.path.join(path, filename)))

    @classmethod
    def load(cls, filepath):
        conf = cls()
        with open(filepath, "rb") as config_file:
            yml_content = config_file.read()
        conf.config = conf._yaml.load(yml_content)
        ver = str(conf["version"])

        if ver != BENTOML_VERSION:
            msg = (
                "Saved BentoService bundle version mismatch: loading BentoService "
                "bundle create with BentoML version {}, but loading from BentoML "
                "version {}".format(conf["version"], BENTOML_VERSION)
            )

            # If major version is different, then there could be incompatible API
            # changes. Raise error in this case.
            if ver.split(".")[0] != BENTOML_VERSION.split(".")[0]:
                if not BENTOML_VERSION.startswith('0+untagged'):
                    raise BentoMLConfigException(msg)
                else:
                    logger.warning(msg)
            else:  # Otherwise just show a warning.
                logger.warning(msg)

        return conf

    def get_bento_service_metadata_pb(self):
        bento_service_metadata = BentoServiceMetadata()
        bento_service_metadata.name = self.config["metadata"]["service_name"]
        bento_service_metadata.version = self.config["metadata"]["service_version"]
        bento_service_metadata.created_at.FromDatetime(
            self.config["metadata"]["created_at"]
        )

        if "env" in self.config:
            if "setup_sh" in self.config["env"]:
                bento_service_metadata.env.setup_sh = self.config["env"]["setup_sh"]

            if "conda_env" in self.config["env"]:
                bento_service_metadata.env.conda_env = dump_to_yaml_str(
                    self.config["env"]["conda_env"]
                )

            if "pip_dependencies" in self.config["env"]:
                bento_service_metadata.env.pip_dependencies = "\n".join(
                    self.config["env"]["pip_dependencies"]
                )
            if "python_version" in self.config["env"]:
                bento_service_metadata.env.python_version = self.config["env"][
                    "python_version"
                ]
            if "docker_base_image" in self.config["env"]:
                bento_service_metadata.env.docker_base_image = self.config["env"][
                    "docker_base_image"
                ]

        if "apis" in self.config:
            for api_config in self.config["apis"]:
                if 'handler_type' in api_config:
                    # Convert handler type to input type for saved bundle created
                    # before version 0.8.0
                    input_type = api_config.get('handler_type')
                elif 'input_type' in api_config:
                    input_type = api_config.get('input_type')
                else:
                    input_type = "unknown"

                if 'output_type' in api_config:
                    output_type = api_config.get('output_type')
                else:
                    output_type = "DefaultOutput"

                api_metadata = BentoServiceMetadata.BentoServiceApi(
                    name=api_config["name"],
                    docs=api_config["docs"],
                    input_type=input_type,
                    output_type=output_type,
                )
                if "handler_config" in api_config:
                    # Supports viewing API input config info for saved bundle created
                    # before version 0.8.0
                    for k, v in api_config["handler_config"].items():
                        if k in {'mb_max_latency', 'mb_max_batch_size'}:
                            setattr(api_metadata, k, v)
                        else:
                            api_metadata.input_config[k] = v
                else:
                    if 'mb_max_latency' in api_config:
                        api_metadata.mb_max_latency = api_config["mb_max_latency"]
                    else:
                        api_metadata.mb_max_latency = DEFAULT_MAX_LATENCY

                    if 'mb_max_batch_size' in api_config:
                        api_metadata.mb_max_batch_size = api_config["mb_max_batch_size"]
                    else:
                        api_metadata.mb_max_batch_size = DEFAULT_MAX_BATCH_SIZE

                if "input_config" in api_config:
                    for k, v in api_config["input_config"].items():
                        api_metadata.input_config[k] = v

                if "output_config" in api_config:
                    for k, v in api_config["output_config"].items():
                        api_metadata.output_config[k] = v
                bento_service_metadata.apis.extend([api_metadata])

        if "artifacts" in self.config:
            for artifact_config in self.config["artifacts"]:
                artifact_metadata = BentoServiceMetadata.BentoArtifact()
                if "name" in artifact_config:
                    artifact_metadata.name = artifact_config["name"]
                if "artifact_type" in artifact_config:
                    artifact_metadata.artifact_type = artifact_config["artifact_type"]
                bento_service_metadata.artifacts.extend([artifact_metadata])

        return bento_service_metadata

    def __getitem__(self, item):
        return self.config[item]

    def __setitem__(self, key, value):
        self.config[key] = value

    def __contains__(self, item):
        return item in self.config
Ejemplo n.º 33
0
def offline_inference(config, encoder, decoder, audio_file):
  MODEL_YAML = config
  CHECKPOINT_ENCODER = encoder
  CHECKPOINT_DECODER = decoder
  sample_rate, signal = wave.read(audio_file)

  # get labels (vocab)
  yaml = YAML(typ="safe")
  with open(MODEL_YAML) as f:
    jasper_model_definition = yaml.load(f)
  labels = jasper_model_definition['labels']

  # build neural factory and neural modules
  neural_factory = nemo.core.NeuralModuleFactory(
    placement=nemo.core.DeviceType.GPU,
    backend=nemo.core.Backend.PyTorch)
  data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
    factory=neural_factory,
    **jasper_model_definition["AudioToMelSpectrogramPreprocessor"])

  jasper_encoder = nemo_asr.JasperEncoder(
    feat_in=jasper_model_definition["AudioToMelSpectrogramPreprocessor"]["features"],
    **jasper_model_definition["JasperEncoder"])

  jasper_decoder = nemo_asr.JasperDecoderForCTC(
    feat_in=jasper_model_definition["JasperEncoder"]["jasper"][-1]["filters"],
    num_classes=len(labels))

  greedy_decoder = nemo_asr.GreedyCTCDecoder()

  # load model
  jasper_encoder.restore_from(CHECKPOINT_ENCODER)
  jasper_decoder.restore_from(CHECKPOINT_DECODER)

  # AudioDataLayer
  class AudioDataLayer(DataLayerNM):
    @staticmethod
    def create_ports():
      input_ports = {}
      output_ports = {
        "audio_signal": NeuralType({0: AxisType(BatchTag),
                                    1: AxisType(TimeTag)}),

        "a_sig_length": NeuralType({0: AxisType(BatchTag)}),
      }
      return input_ports, output_ports

    def __init__(self, **kwargs):
      DataLayerNM.__init__(self, **kwargs)
      self.output_enable = False

    def __iter__(self):
      return self

    def __next__(self):
      if not self.output_enable:
        raise StopIteration
      self.output_enable = False
      return torch.as_tensor(self.signal, dtype=torch.float32), \
            torch.as_tensor(self.signal_shape, dtype=torch.int64)

    def set_signal(self, signal):
      self.signal = np.reshape(signal.astype(np.float32)/32768., [1, -1])
      self.signal_shape = np.expand_dims(self.signal.size, 0).astype(np.int64)
      self.output_enable = True

    def __len__(self):
      return 1

    @property
    def dataset(self):
      return None

    @property
    def data_iterator(self):
      return self

  # Instantiate necessary neural modules
  data_layer = AudioDataLayer()

  # Define inference DAG
  audio_signal, audio_signal_len = data_layer()
  processed_signal, processed_signal_len = data_preprocessor(
    input_signal=audio_signal,
    length=audio_signal_len)
  encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                        length=processed_signal_len)
  log_probs = jasper_decoder(encoder_output=encoded)
  predictions = greedy_decoder(log_probs=log_probs)

  # audio inference
  data_layer.set_signal(signal)

  tensors = neural_factory.infer([
    audio_signal,
    processed_signal,
    encoded,
    log_probs,
    predictions], verbose=False)

  # results
  audio = tensors[0][0][0].cpu().numpy()
  features = tensors[1][0][0].cpu().numpy()
  encoded_features = tensors[2][0][0].cpu().numpy(),
  probs = tensors[3][0][0].cpu().numpy()
  preds = tensors[4][0]
  transcript = post_process_predictions([preds], labels)

  return transcript, audio, features, encoded_features, probs, preds
Ejemplo n.º 34
0
 def callback(data):
     yaml = YAML()
     return yaml.load(data)
Ejemplo n.º 35
0
def main(config=None):
    print(banner)

    parser = argparse.ArgumentParser(
        description='Boa, the fast build tool for conda packages.')
    subparsers = parser.add_subparsers(help='sub-command help', dest='command')
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument('recipe_dir', type=str)

    render_parser = subparsers.add_parser('render',
                                          parents=[parent_parser],
                                          help='render a recipe')
    build_parser = subparsers.add_parser('build',
                                         parents=[parent_parser],
                                         help='build a recipe')
    args = parser.parse_args()

    command = args.command

    folder = args.recipe_dir
    config = get_or_merge_config(None, {})
    config_files = find_config_files(folder)
    parsed_cfg = collections.OrderedDict()
    for f in config_files:
        parsed_cfg[f] = parse_config_file(f, config)
        normalized = {}
        for k in parsed_cfg[f].keys():
            if "_" in k:
                n = k.replace("_", "-")
                normalized[n] = parsed_cfg[f][k]
        parsed_cfg[f].update(normalized)

    # TODO just using latest config here, should merge!
    if len(config_files):
        cbc = parsed_cfg[config_files[-1]]
    else:
        cbc = {}

    update_index(os.path.dirname(config.output_folder),
                 verbose=config.debug,
                 threads=1)

    recipe_path = os.path.join(folder, "recipe.yaml")

    # step 1: parse YAML
    with open(recipe_path) as fi:
        loader = YAML(typ="safe")
        ydoc = loader.load(fi)

    # step 2: fill out context dict
    context_dict = ydoc.get("context") or {}
    jenv = jinja2.Environment()
    for key, value in context_dict.items():
        if isinstance(value, str):
            tmpl = jenv.from_string(value)
            context_dict[key] = tmpl.render(context_dict)

    if ydoc.get("context"):
        del ydoc["context"]

    # step 3: recursively loop over the entire recipe and render jinja with context
    jenv.globals.update(jinja_functions(config, context_dict))
    for key in ydoc:
        render_recursive(ydoc[key], context_dict, jenv)

    flatten_selectors(ydoc, ns_cfg(config))

    # We need to assemble the variants for each output

    variants = {}
    # if we have a outputs section, use that order the outputs
    if ydoc.get("outputs"):

        # if ydoc.get("build"):
        #     raise InvalidRecipeError("You can either declare outputs, or build?")
        for o in ydoc["outputs"]:

            # inherit from global package
            pkg_meta = {}
            pkg_meta.update(ydoc["package"])
            pkg_meta.update(o["package"])
            o["package"] = pkg_meta

            build_meta = {}
            build_meta.update(ydoc.get("build"))
            build_meta.update(o.get("build") or {})
            o["build"] = build_meta
            variants[o["package"]["name"]] = get_dependency_variants(
                o["requirements"], cbc, config)
    else:
        # we only have one output
        variants[ydoc["package"]["name"]] = get_dependency_variants(
            ydoc["requirements"], cbc, config)

    # this takes in all variants and outputs, builds a dependency tree and returns
    # the final metadata
    sorted_outputs = to_build_tree(ydoc, variants, config)

    # then we need to solve and build from the bottom up
    # we can't first solve all packages without finalizing everything

    # - solve the package
    #   - solv build, add weak run exports to
    # - add run exports from deps!

    if command == 'render':
        for o in sorted_outputs:
            print(o)
        exit()

    solver = MambaSolver(["conda-forge"], "linux-64")
    for o in sorted_outputs:
        solver.replace_channels()
        o.finalize_solve(sorted_outputs, solver)
        print(o)

        o.config.compute_build_id(o.name)

        print(o.config.host_prefix)

        if 'build' in o.transactions:
            mkdir_p(o.config.build_prefix)
            print(o.transactions)
            o.transactions['build'].execute(
                PrefixData(o.config.build_prefix),
                PackageCacheData.first_writable().pkgs_dir)
        if 'host' in o.transactions:
            mkdir_p(o.config.host_prefix)
            print(o.transactions)
            o.transactions['host'].execute(
                PrefixData(o.config.host_prefix),
                PackageCacheData.first_writable().pkgs_dir)
        print(o.sections)
        stats = {}

        print("Final variant config")
        print(config.variant)
        print(o.variant)
        build(MetaData(recipe_path, o), None)

    # sorted_outputs
    # print(sorted_outputs[0].config.host_prefix)
    exit()

    for o in sorted_outputs:
        print("\n")
        print(o)
Ejemplo n.º 36
0
    """
    Enables recursive dot notation for ``dict``.
    """

    return json.loads(json.dumps(inpt),
                      object_hook=lambda x: Bunch(**{
                          **Bunch(),
                          **x
                      }))


# Read the BIDS schema data
with (BIDSCOIN_SCHEMA_DIR.joinpath('objects/datatypes.yaml')
      ).open('r') as _stream:
    bidsdatatypesdef = yaml.load(
        _stream
    )  # The valid BIDS datatypes, along with their full names and descriptions
bidsdatatypes = {}
for _datatype in bidsdatatypesdef:  # The entities that can/should be present for each BIDS datatype
    with (BIDSCOIN_SCHEMA_DIR.joinpath(f'rules/datatypes/{_datatype}')
          ).with_suffix('.yaml').open('r') as _stream:
        bidsdatatypes[_datatype] = yaml.load(_stream)
with (BIDSCOIN_SCHEMA_DIR.joinpath('objects/suffixes.yaml')
      ).open('r') as _stream:
    suffixes = yaml.load(
        _stream)  # The descriptions of the valid BIDS file suffixes
with (BIDSCOIN_SCHEMA_DIR.joinpath('objects/entities.yaml')
      ).open('r') as _stream:
    entities = yaml.load(
        _stream)  # The descriptions of the entities present in BIDS filenames
with (BIDSCOIN_SCHEMA_DIR.joinpath('rules/entities.yaml')
Ejemplo n.º 37
0
                        'conda', 'smithy', 'register-feedstock-token',
                        '--feedstock_directory', feedstock_dir
                    ] + owner_info)

                write_token('anaconda', os.environ['STAGING_BINSTAR_TOKEN'])
                subprocess.check_call([
                    'conda', 'smithy', 'rotate-binstar-token',
                    '--without-appveyor', '--token_name',
                    'STAGING_BINSTAR_TOKEN'
                ],
                                      cwd=feedstock_dir)

                yaml = YAML()
                with open(os.path.join(feedstock_dir, "conda-forge.yml"),
                          "r") as fp:
                    _cfg = yaml.load(fp.read())
                _cfg["conda_forge_output_validation"] = True
                with open(os.path.join(feedstock_dir, "conda-forge.yml"),
                          "w") as fp:
                    yaml.dump(_cfg, fp)
                subprocess.check_call(["git", "add", "conda-forge.yml"],
                                      cwd=feedstock_dir)
                subprocess.check_call(['conda', 'smithy', 'rerender'],
                                      cwd=feedstock_dir)
            except subprocess.CalledProcessError:
                exit_code = 0
                traceback.print_exception(*sys.exc_info())
                continue

            print("making a commit and pushing...")
            subprocess.check_call([
Ejemplo n.º 38
0
from ruamel.yaml import YAML
import requests
"""
This script print all the tools that do not have a corresponding biotools accession or doi.   
"""

yaml = YAML()
yaml_recipe = YAML(typ="rt")  # pylint: disable=invalid-name
with open('../annotations.yaml', 'r') as read_file:
    file_annotations = yaml.load(read_file)

tools = {}
not_biotools = []


def search_tool(key):
    count = 0
    url = 'https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=' + key + '&format=json&pageSize=1000'
    try:
        page = requests.get(url).json()
        if 'resultList' in page:
            for publication in page['resultList']['result']:

                common_name = key + ":"
                # if common_name in publication['title'].lower() and (
                #         'nmeth.' in publication['doi'] or 'bioinformatics' in publication['doi'] or 'nar\/' in publication['doi'] or 'gigascience' in publication['doi'] or 'nbt.' in publication['doi']):
                #     print(key + ' ---- ' + publication['title'] + ' --- ' + publication['doi'])
                if common_name in publication['title'].lower():
                    print(key + ' ---- ' + publication['title'] + ' --- ' +
                          '  -' + '   doi:' + publication['doi'])
Ejemplo n.º 39
0
#!/usr/bin/python3

# Changes namespace, name, and version in Galaxy metadata.
# Useful for releasing to Automation Hub, where Collections live
# in namespaces separated from Ansible Galaxy.

import sys
from ruamel.yaml import YAML

filepath = "galaxy.yml"

buf = open(filepath).read()

yaml = YAML(typ="rt")
yaml.default_flow_style = False
yaml.preserve_quotes = True
yaml.indent(mapping=2, sequence=4, offset=2)

code = yaml.load(buf)

code["namespace"] = sys.argv[1]
code["name"] = sys.argv[2]
code["version"] = sys.argv[3]
yaml.dump(code, sys.stdout)
Ejemplo n.º 40
0
class Unifier:
    def __init__(self,
                 indir: str,
                 dir_name=INTEGRATIONS_DIR,
                 outdir='',
                 image_prefix=DEFAULT_IMAGE_PREFIX):

        directory_name = ""
        for optional_dir_name in DIR_TO_PREFIX:
            if optional_dir_name in indir:
                directory_name = optional_dir_name

        if not directory_name:
            print_error(
                'You have failed to provide a legal file path, a legal file path '
                'should contain either Integrations or Scripts directories')

        self.image_prefix = image_prefix
        self.package_path = indir
        if self.package_path.endswith(os.sep):
            self.package_path = self.package_path.rstrip(os.sep)

        self.dest_path = outdir

        yml_paths, self.yml_path = get_yml_paths_in_dir(
            self.package_path, Errors.no_yml_file(self.package_path))
        for path in yml_paths:
            # The plugin creates a unified YML file for the package.
            # In case this script runs locally and there is a unified YML file in the package we need to ignore it.
            # Also,
            # we don't take the unified file by default because
            # there might be packages that were not created by the plugin.
            if 'unified' not in path:
                self.yml_path = path
                break

        self.ryaml = YAML()
        self.ryaml.preserve_quotes = True
        self.ryaml.width = 400  # make sure long lines will not break (relevant for code section)
        if self.yml_path:
            with open(self.yml_path, 'r') as yml_file:
                self.yml_data = self.ryaml.load(yml_file)
        else:
            self.yml_data = {}
            print_error(f'No yml found in path: {self.package_path}')

        # script key for scripts is a string.
        # script key for integrations is a dictionary.
        self.is_script_package = isinstance(self.yml_data.get('script'), str)
        self.dir_name = SCRIPTS_DIR if self.is_script_package else dir_name

    def write_yaml_with_docker(self, yml_unified, yml_data, script_obj):
        """Write out the yaml file taking into account the dockerimage45 tag.
        If it is present will create 2 integration files
        One for 4.5 and below and one for 5.0.

        Arguments:
            output_path {str} -- output path
            yml_unified {dict} -- unified yml dict
            yml_data {dict} -- yml object
            script_obj {dict} -- script object

        Returns:
            dict -- dictionary mapping output path to unified data
        """
        output_map = {self.dest_path: yml_unified}
        if 'dockerimage45' in script_obj:
            # we need to split into two files 45 and 50. Current one will be from version 5.0
            if self.is_script_package:  # scripts
                del yml_unified['dockerimage45']
            else:  # integrations
                del yml_unified['script']['dockerimage45']

            yml_unified45 = copy.deepcopy(yml_unified)

            # validate that this is a script/integration which targets both 4.5 and 5.0+.
            if server_version_compare(yml_data.get('fromversion', '0.0.0'),
                                      '5.0.0') >= 0:
                raise ValueError(
                    f'Failed: {self.dest_path}. dockerimage45 set for 5.0 and later only'
                )

            yml_unified['fromversion'] = '5.0.0'

            # validate that this is a script/integration which targets both 4.5 and 5.0+.
            if server_version_compare(yml_data.get('toversion', '99.99.99'),
                                      '5.0.0') < 0:
                raise ValueError(
                    f'Failed: {self.dest_path}. dockerimage45 set for 4.5 and earlier only'
                )

            yml_unified45['toversion'] = '4.5.9'

            if script_obj.get(
                    'dockerimage45'
            ):  # we have a value for dockerimage45 set it as dockerimage
                if self.is_script_package:  # scripts
                    yml_unified45['dockerimage'] = script_obj.get(
                        'dockerimage45')
                else:  # integrations
                    yml_unified45['script']['dockerimage'] = script_obj.get(
                        'dockerimage45')

            else:  # no value for dockerimage45 remove the dockerimage entry
                del yml_unified45['dockerimage']

            output_path45 = re.sub(r'\.yml$', '_45.yml', self.dest_path)
            output_map = {
                self.dest_path: yml_unified,
                output_path45: yml_unified45,
            }

        for file_path, file_data in output_map.items():
            if os.path.isfile(file_path):
                raise ValueError(
                    f'Output file already exists: {self.dest_path}.'
                    ' Make sure to remove this file from source control'
                    ' or rename this package (for example if it is a v2).')

            with io.open(file_path, mode='w', encoding='utf-8') as file_:
                self.ryaml.dump(file_data, file_)

        return output_map

    def merge_script_package_to_yml(self):
        """Merge the various components to create an output yml file
        """
        print("Merging package: {}".format(self.package_path))
        package_dir_name = os.path.basename(self.package_path)
        output_filename = '{}-{}.yml'.format(DIR_TO_PREFIX[self.dir_name],
                                             package_dir_name)

        if self.dest_path:
            self.dest_path = os.path.join(self.dest_path, output_filename)
        else:
            self.dest_path = os.path.join(self.dir_name, output_filename)

        script_obj = self.yml_data

        if not self.is_script_package:
            script_obj = self.yml_data['script']
        script_type = TYPE_TO_EXTENSION[script_obj['type']]

        yml_unified = copy.deepcopy(self.yml_data)

        yml_unified, script_path = self.insert_script_to_yml(
            script_type, yml_unified, self.yml_data)
        image_path = None
        desc_path = None
        if not self.is_script_package:
            yml_unified, image_path = self.insert_image_to_yml(
                self.yml_data, yml_unified)
            yml_unified, desc_path = self.insert_description_to_yml(
                self.yml_data, yml_unified)

        output_map = self.write_yaml_with_docker(yml_unified, self.yml_data,
                                                 script_obj)
        unifier_outputs = list(output_map.keys(
        )), self.yml_path, script_path, image_path, desc_path
        print_color(f'Created unified yml: {list(output_map.keys())}',
                    LOG_COLORS.GREEN)

        return unifier_outputs[0]

    def insert_image_to_yml(self, yml_data, yml_unified):
        image_data, found_img_path = self.get_data("*png")
        image_data = self.image_prefix + base64.b64encode(image_data).decode(
            'utf-8')

        if yml_data.get('image'):
            raise ValueError(
                'Please move the image from the yml to an image file (.png)'
                f' in the package: {self.package_path}')

        yml_unified['image'] = image_data

        return yml_unified, found_img_path

    def insert_description_to_yml(self, yml_data, yml_unified):
        desc_data, found_desc_path = self.get_data('*_description.md')

        if yml_data.get('detaileddescription'):
            raise ValueError(
                'Please move the detailed description from the yml to a description file (.md)'
                f' in the package: {self.package_path}')
        if desc_data:
            yml_unified['detaileddescription'] = FoldedScalarString(
                desc_data.decode('utf-8'))

        return yml_unified, found_desc_path

    def get_data(self, extension):
        data_path = glob.glob(os.path.join(self.package_path, extension))
        data = None
        found_data_path = None
        if not self.is_script_package and data_path:
            found_data_path = data_path[0]
            with open(found_data_path, 'rb') as data_file:
                data = data_file.read()

        return data, found_data_path

    def get_code_file(self, script_type):
        """Return the first code file in the specified directory path
        :param script_type: script type: .py or .js
        :type script_type: str
        :return: path to found code file
        :rtype: str
        """

        ignore_regex = (
            r'CommonServerPython\.py|CommonServerUserPython\.py|demistomock\.py|_test\.py'
            r'|conftest\.py|__init__\.py|ApiModule\.py')

        if self.package_path.endswith('Scripts/CommonServerPython'):
            return os.path.join(self.package_path, 'CommonServerPython.py')

        if self.package_path.endswith('ApiModule'):
            return os.path.join(
                self.package_path,
                os.path.basename(os.path.normpath(self.package_path)) + '.py')

        script_path = list(
            filter(
                lambda x: not re.search(ignore_regex, x),
                glob.glob(os.path.join(self.package_path,
                                       '*' + script_type))))[0]

        return script_path

    def insert_script_to_yml(self, script_type, yml_unified, yml_data):
        script_path = self.get_code_file(script_type)
        with io.open(script_path, mode='r', encoding='utf-8') as script_file:
            script_code = script_file.read()

        # Check if the script imports an API module. If it does,
        # the API module code will be pasted in place of the import.
        module_import, module_name = self.check_api_module_imports(script_code)
        if module_import:
            script_code = self.insert_module_code(script_code, module_import,
                                                  module_name)

        clean_code = self.clean_python_code(script_code)

        if self.is_script_package:
            if yml_data.get('script', '') not in ('', '-'):
                print_warning(
                    f'Script section is not empty in package {self.package_path}.'
                    f'It should be blank or a dash(-).')

            yml_unified['script'] = FoldedScalarString(clean_code)

        else:
            if yml_data['script'].get('script', '') not in ('', '-'):
                print_warning(
                    f'Script section is not empty in package {self.package_path}.'
                    f'It should be blank or a dash(-).')

            yml_unified['script']['script'] = FoldedScalarString(clean_code)

        return yml_unified, script_path

    def get_script_package_data(self):
        # should be static method
        _, yml_path = get_yml_paths_in_dir(self.package_path, error_msg='')
        if not yml_path:
            raise Exception(
                f'No yml files found in package path: {self.package_path}. '
                'Is this really a package dir?')

        code_type = get_yaml(yml_path).get('type')
        unifier = Unifier(self.package_path)
        code_path = unifier.get_code_file(TYPE_TO_EXTENSION[code_type])
        with open(code_path, 'r') as code_file:
            code = code_file.read()

        return yml_path, code

    @staticmethod
    def check_api_module_imports(script_code: str) -> Tuple[str, str]:
        """
        Checks integration code for API module imports
        :param script_code: The integration code
        :return: The import string and the imported module name
        """

        # General regex to find API module imports, for example: "from MicrosoftApiModule import *  # noqa: E402"
        module_regex = r'from ([\w\d]+ApiModule) import \*(?:  # noqa: E402)?'

        module_match = re.search(module_regex, script_code)
        if module_match:
            return module_match.group(), module_match.group(1)

        return '', ''

    @staticmethod
    def insert_module_code(script_code: str, module_import: str,
                           module_name: str) -> str:
        """
        Inserts API module in place of an import to the module according to the module name
        :param script_code: The integration code
        :param module_import: The module import string to replace
        :param module_name: The module name
        :return: The integration script with the module code appended in place of the import
        """

        module_path = os.path.join('./Packs', 'ApiModules', 'Scripts',
                                   module_name, module_name + '.py')
        module_code = Unifier._get_api_module_code(module_name, module_path)

        module_code = '\n### GENERATED CODE ###\n# This code was inserted in place of an API module.{}\n' \
            .format(module_code)

        return script_code.replace(module_import, module_code)

    @staticmethod
    def _get_api_module_code(module_name, module_path):
        """
        Attempts to get the API module code from the ApiModules pack.
        :param module_name: The API module name
        :param module_path: The API module code file path
        :return: The API module code
        """
        try:
            with io.open(module_path, mode='r',
                         encoding='utf-8') as script_file:
                module_code = script_file.read()
        except Exception as exc:
            raise ValueError(
                'Could not retrieve the module [{}] code: {}'.format(
                    module_name, str(exc)))

        return module_code

    @staticmethod
    def clean_python_code(script_code, remove_print_future=True):
        script_code = script_code.replace("import demistomock as demisto", "")
        script_code = script_code.replace("from CommonServerPython import *",
                                          "")
        script_code = script_code.replace(
            "from CommonServerUserPython import *", "")
        # print function is imported in python loop
        if remove_print_future:  # docs generation requires to leave this
            script_code = script_code.replace(
                "from __future__ import print_function", "")
        return script_code
Ejemplo n.º 41
0
 def __init__(self, meta_yaml):
     _yml = YAML(typ='jinja2')
     _yml.indent(mapping=2, sequence=4, offset=2)
     _yml.width = 160
     _yml.allow_duplicate_keys = True
     self.meta = _yml.load(meta_yaml)
def main():
    parser = argparse.ArgumentParser(description='Jasper')
    parser.add_argument("--local_rank", default=None, type=int)
    parser.add_argument("--batch_size", default=32, type=int)
    parser.add_argument("--model_config", type=str, required=True)
    parser.add_argument("--eval_datasets", type=str, required=True)
    parser.add_argument("--load_dir", type=str, required=True)
    parser.add_argument("--vocab_file", type=str, required=True)
    parser.add_argument("--save_logprob", default=None, type=str)
    parser.add_argument("--lm_path", default=None, type=str)
    parser.add_argument("--beam_width", default=50, type=int)
    parser.add_argument("--alpha", default=2.0, type=float)
    parser.add_argument("--beta", default=1.0, type=float)
    parser.add_argument("--cutoff_prob", default=0.99, type=float)
    parser.add_argument("--cutoff_top_n", default=40, type=int)

    args = parser.parse_args()
    batch_size = args.batch_size
    load_dir = args.load_dir

    if args.local_rank is not None:
        if args.lm_path:
            raise NotImplementedError(
                "Beam search decoder with LM does not currently support evaluation on multi-gpu."
            )
        device = nemo.core.DeviceType.AllGpu
    else:
        device = nemo.core.DeviceType.GPU

    # Instantiate Neural Factory with supported backend
    neural_factory = nemo.core.NeuralModuleFactory(
        backend=nemo.core.Backend.PyTorch,
        local_rank=args.local_rank,
        optimization_level=nemo.core.Optimization.mxprO1,
        placement=device,
    )

    if args.local_rank is not None:
        logging.info('Doing ALL GPU')

    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)

    vocab = load_vocab(args.vocab_file)

    sample_rate = jasper_params['sample_rate']

    eval_datasets = args.eval_datasets

    eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"])
    eval_dl_params["normalize_transcripts"] = False
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layer = nemo_asr.AudioToTextDataLayer(
        manifest_filepath=eval_datasets,
        sample_rate=sample_rate,
        labels=vocab,
        batch_size=batch_size,
        **eval_dl_params,
    )

    n = len(data_layer)
    logging.info('Evaluating {0} examples'.format(n))

    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
        sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"],
    )
    jasper_encoder = nemo_asr.JasperEncoder(
        feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"],
    )
    jasper_decoder = nemo_asr.JasperDecoderForCTC(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab),
    )
    greedy_decoder = nemo_asr.GreedyCTCDecoder()

    if args.lm_path:
        beam_width = args.beam_width
        alpha = args.alpha
        beta = args.beta
        cutoff_prob = args.cutoff_prob
        cutoff_top_n = args.cutoff_top_n
        beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
            vocab=vocab,
            beam_width=beam_width,
            alpha=alpha,
            beta=beta,
            cutoff_prob=cutoff_prob,
            cutoff_top_n=cutoff_top_n,
            lm_path=args.lm_path,
            num_cpus=max(os.cpu_count(), 1),
        )

    logging.info('================================')
    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
    logging.info('================================')

    (audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1,) = data_layer()
    processed_signal_e1, p_length_e1 = data_preprocessor(input_signal=audio_signal_e1, length=a_sig_length_e1)
    encoded_e1, encoded_len_e1 = jasper_encoder(audio_signal=processed_signal_e1, length=p_length_e1)
    log_probs_e1 = jasper_decoder(encoder_output=encoded_e1)
    predictions_e1 = greedy_decoder(log_probs=log_probs_e1)

    eval_tensors = [
        log_probs_e1,
        predictions_e1,
        transcript_e1,
        transcript_len_e1,
        encoded_len_e1,
    ]

    if args.lm_path:
        beam_predictions_e1 = beam_search_with_lm(log_probs=log_probs_e1, log_probs_length=encoded_len_e1)
        eval_tensors.append(beam_predictions_e1)

    evaluated_tensors = neural_factory.infer(tensors=eval_tensors, checkpoint_dir=load_dir,)

    greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab)
    references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab)
    cer = word_error_rate(hypotheses=greedy_hypotheses, references=references, use_cer=True)
    logging.info("Greedy CER {:.2f}%".format(cer * 100))

    if args.lm_path:
        beam_hypotheses = []
        # Over mini-batch
        for i in evaluated_tensors[-1]:
            # Over samples
            for j in i:
                beam_hypotheses.append(j[0][1])

        cer = word_error_rate(hypotheses=beam_hypotheses, references=references, use_cer=True)
        logging.info("Beam CER {:.2f}".format(cer * 100))

    if args.save_logprob:
        # Convert logits to list of numpy arrays
        logprob = []
        for i, batch in enumerate(evaluated_tensors[0]):
            for j in range(batch.shape[0]):
                logprob.append(batch[j][: evaluated_tensors[4][i][j], :].cpu().numpy())
        with open(args.save_logprob, 'wb') as f:
            pickle.dump(logprob, f, protocol=pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 43
0
def startup():
    if not os.path.exists(CONFIG_PATH):
        info("Foxify Directory Missing! Creating One For You...")
        os.makedirs(CONFIG_PATH)
    if not os.path.exists(DEFAULT_THEME_PATH):
        os.makedirs(DEFAULT_THEME_PATH)
    if not os.path.exists(DEFAULT_TWEAK_PATH):
        os.makedirs(DEFAULT_TWEAK_PATH)
    if not os.path.exists(DEFAULT_CONFIG):
        while True:
            info("If you have not yet setup userChrome CSS Cusotmization\nPlease Open Up Your Firefox Browser and Follow These Steps:")
            print("""\
1. Go to "about:support" by typing it into your Address Bar

2. Copy the File Path for your Profile Folder

3. Enter it below""")
            filepath = input("> ")
            print("You Entered:", filepath.strip())
            print("Is this correct? Y\\n")
            ans = input("> ")
            if ans.lower() == "y":
                DCONF['active_profile'] = os.path.realpath(filepath.strip())
                info("Writing Default Configuration...")
                with open(DEFAULT_CONFIG, 'w') as f:
                    yaml = YAML()
                    yaml.default_flow_style = False
                    yaml.dump(DCONF, f)
                info("Checking If userChrome CSS Customization is Enabled")
                with open(DCONF['active_profile'] + '/prefs.js', 'r') as f:
                    match = False
                    deact_match = False
                    for line in f.readlines():
                        if line == '"user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", true);"':
                            match = True
                        if line == '"user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", false);"':
                            deact_match = True
                if not match:
                    info('Enabling userChrome CSS Customization')
                    with open(DCONF['active_profile'] + '/prefs.js', 'a') as f:
                        f.write('user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", false);')
                if not match and deact_match:
                    info('Enabling userChrome CSS Customization')
                    with open(DCONF['active_profile'] + '/prefs.js', 'w') as f:
                        content = f.read()
                        content = content.replace('user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", false);', 'user_pref("toolkit.legacyUserProfileCustomizations.stylesheets", true);')
                        f.write()
                info('Checking For Chrome and Backup Directory')
                if not os.path.exists(DCONF['active_profile'] + '/chrome'):
                    os.makedirs(DCONF['active_profile'] + '/chrome')
                if not os.path.exists(DCONF['active_profile'] + '/chrome_backup'):
                    os.makedirs(DCONF['active_profile'] + '/chrome_backup')
                info('Chrome Directory and Backup Directory Created')
                break
            else:
                pass
    else:
        with open(DEFAULT_CONFIG, 'r') as f:
            yaml = YAML(typ='safe')
            config = yaml.load(f)
        if not config.get('config_version'):
            for k, v in DCONF.items():
                if not config.get(k):
                    config[k] = v
            with open(DEFAULT_CONFIG, 'w') as f:
                yaml = YAML()
                yaml.default_flow_style = False
                yaml.dump(config, f)
        if config['config_version'] != CONFIG_VERSION:
            for k, v in DCONF.items():
                if not config.get(k):
                    config[k] = v
            with open(DEFAULT_CONFIG, 'w') as f:
                yaml = YAML()
                yaml.default_flow_style = False
                yaml.dump(config, f)
        if config['check_for_updates']:
            res = requests.get('https://raw.githubusercontent.com/M4cs/foxify-cli/master/version').text
            if res == version:
                config['version'] = version
                with open(DEFAULT_CONFIG, 'w') as f:
                    yaml = YAML()
                    yaml.default_flow_style = False
                    yaml.dump(config, f)
            else:
                info("Update Available! Run 'pip3 install --upgrade foxify-cli' to Update to Version: " + res)
Ejemplo n.º 44
0
                key = newkey
                newkey = ''

        lineNum += 1

    return ret_val

with open('index.md', 'r') as f:
    endReached = False

    data = f.read().split('\n')
    for d in data:
        print d
        if "<!-- AUTO-GENERATED-START -->" in d:
            print '| Key | Default Value | Description |'
            print '| --- | --- | --- |'
            break

    with open('values.yaml', 'r') as f_v:
        d_v = f_v.read()
        yaml = YAML()
        code = yaml.load(d_v)
        yaml.explicit_start = True
        yaml.dump(code, sys.stdout, transform=decode_helm_yaml)

    for d in data:
        if "<!-- AUTO-GENERATED-END -->" in d:
            endReached = True
        if endReached:
            print d
Ejemplo n.º 45
0
def create_all_dags(args, neural_factory):
    '''
    creates train and eval dags as well as their callbacks
    returns train loss tensor and callbacks'''

    # parse the config files
    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        contextnet_params = yaml.load(f)

    vocab = contextnet_params['labels']
    sample_rate = contextnet_params['sample_rate']

    # Calculate num_workers for dataloader
    total_cpus = os.cpu_count()
    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)

    # create data layer for training
    train_dl_params = copy.deepcopy(contextnet_params["AudioToTextDataLayer"])
    train_dl_params.update(contextnet_params["AudioToTextDataLayer"]["train"])
    del train_dl_params["train"]
    del train_dl_params["eval"]
    # del train_dl_params["normalize_transcripts"]

    data_layer_train = nemo_asr.AudioToTextDataLayer(
        manifest_filepath=args.train_dataset,
        sample_rate=sample_rate,
        labels=vocab,
        batch_size=args.batch_size,
        num_workers=cpu_per_traindl,
        **train_dl_params,
    )

    N = len(data_layer_train)
    steps_per_epoch = int(
        N / (args.batch_size * args.iter_per_step * args.num_gpus))

    # create separate data layers for eval
    # we need separate eval dags for separate eval datasets
    # but all other modules in these dags will be shared

    eval_dl_params = copy.deepcopy(contextnet_params["AudioToTextDataLayer"])
    eval_dl_params.update(contextnet_params["AudioToTextDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]

    data_layers_eval = []
    if args.eval_datasets:
        for eval_dataset in args.eval_datasets:
            data_layer_eval = nemo_asr.AudioToTextDataLayer(
                manifest_filepath=eval_dataset,
                sample_rate=sample_rate,
                labels=vocab,
                batch_size=args.eval_batch_size,
                num_workers=cpu_per_traindl,
                **eval_dl_params,
            )

            data_layers_eval.append(data_layer_eval)
    else:
        logging.warning("There were no val datasets passed")

    # create shared modules

    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
        sample_rate=sample_rate,
        **contextnet_params["AudioToMelSpectrogramPreprocessor"],
    )

    # Inject the `kernel_size_factor` kwarg to the ContextNet config
    # Skip the last layer  as that must be a pointwise kernel
    for idx in range(
            len(contextnet_params["ContextNetEncoder"]["jasper"]) - 1):
        contextnet_params["ContextNetEncoder"]["jasper"][idx][
            "kernel_size_factor"] = args.kernel_size_factor

    # (ContextNet uses the Jasper baseline encoder and decoder)
    encoder = nemo_asr.ContextNetEncoder(
        feat_in=contextnet_params["AudioToMelSpectrogramPreprocessor"]
        ["features"],
        **contextnet_params["ContextNetEncoder"],
    )

    decoder = nemo_asr.JasperDecoderForCTC(
        feat_in=contextnet_params["ContextNetEncoder"]["jasper"][-1]
        ["filters"],
        num_classes=len(vocab),
    )

    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab), zero_infinity=True)

    greedy_decoder = nemo_asr.GreedyCTCDecoder()

    # create augmentation modules (only used for training) if their configs
    # are present

    multiply_batch_config = contextnet_params.get('MultiplyBatch', None)
    if multiply_batch_config:
        multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config)

    spectr_augment_config = contextnet_params.get('SpectrogramAugmentation',
                                                  None)
    if spectr_augment_config:
        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(
            **spectr_augment_config)

    # assemble train DAG

    (
        audio_signal_t,
        a_sig_length_t,
        transcript_t,
        transcript_len_t,
    ) = data_layer_train()

    processed_signal_t, p_length_t = data_preprocessor(
        input_signal=audio_signal_t, length=a_sig_length_t)

    if multiply_batch_config:
        (
            processed_signal_t,
            p_length_t,
            transcript_t,
            transcript_len_t,
        ) = multiply_batch(
            in_x=processed_signal_t,
            in_x_len=p_length_t,
            in_y=transcript_t,
            in_y_len=transcript_len_t,
        )

    if spectr_augment_config:
        processed_signal_t = data_spectr_augmentation(
            input_spec=processed_signal_t)

    encoded_t, encoded_len_t = encoder(audio_signal=processed_signal_t,
                                       length=p_length_t)
    log_probs_t = decoder(encoder_output=encoded_t)
    predictions_t = greedy_decoder(log_probs=log_probs_t)
    loss_t = ctc_loss(
        log_probs=log_probs_t,
        targets=transcript_t,
        input_length=encoded_len_t,
        target_length=transcript_len_t,
    )

    # create train callbacks
    train_callback = nemo.core.SimpleLossLoggerCallback(
        tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
        print_func=partial(monitor_asr_train_progress, labels=vocab),
        get_tb_values=lambda x: [["loss", x[0]]],
        tb_writer=neural_factory.tb_writer,
        step_freq=args.update_freq,
    )

    callbacks = [train_callback]

    if args.checkpoint_dir or args.load_dir:
        chpt_callback = nemo.core.CheckpointCallback(
            folder=args.checkpoint_dir,
            load_from_folder=args.load_dir,
            step_freq=args.checkpoint_save_freq,
        )

        callbacks.append(chpt_callback)

    # Log training metrics to wandb
    if args.project is not None:
        wand_callback = nemo.core.WandbCallback(
            train_tensors=[loss_t],
            wandb_name=args.exp_name,
            wandb_project=args.project,
            update_freq=args.update_freq,
            args=args,
        )
        callbacks.append(wand_callback)

    # assemble eval DAGs
    for i, eval_dl in enumerate(data_layers_eval):
        (
            audio_signal_e,
            a_sig_length_e,
            transcript_e,
            transcript_len_e,
        ) = eval_dl()
        processed_signal_e, p_length_e = data_preprocessor(
            input_signal=audio_signal_e, length=a_sig_length_e)
        encoded_e, encoded_len_e = encoder(audio_signal=processed_signal_e,
                                           length=p_length_e)
        log_probs_e = decoder(encoder_output=encoded_e)
        predictions_e = greedy_decoder(log_probs=log_probs_e)
        loss_e = ctc_loss(
            log_probs=log_probs_e,
            targets=transcript_e,
            input_length=encoded_len_e,
            target_length=transcript_len_e,
        )

        # create corresponding eval callback
        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]

        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[
                loss_e,
                predictions_e,
                transcript_e,
                transcript_len_e,
            ],
            user_iter_callback=partial(process_evaluation_batch, labels=vocab),
            user_epochs_done_callback=partial(process_evaluation_epoch,
                                              tag=tagname),
            eval_step=args.eval_freq,
            tb_writer=neural_factory.tb_writer,
        )

        callbacks.append(eval_callback)

    return loss_t, callbacks, steps_per_epoch
Ejemplo n.º 46
0
from ruamel.yaml import YAML

yaml = YAML()

files = ('../remoteid/augmented.yaml', '../remoteid/canonical.yaml')
for fname in files:
    try:
        with open(fname, 'r') as f:
            api = yaml.load(f)
        with open(fname, 'w') as f:
            yaml.dump(api, f)
    except IOError:
        pass
Ejemplo n.º 47
0
LOG_DIR = LOG_ROOT + TASK_NAME + '/' + time_str + '/'
os.makedirs(LOG_DIR, exist_ok=True)
LOGFILE = LOG_DIR + 'msgs.log'
TRAIN_LOG = LOG_DIR + 'training.log'
TEST_LOG = LOG_DIR + 'test.log'
MODEL_WTS = LOG_DIR + 'model_weights.hdf5'
LOG_YAML = 'logger_config.yaml'


def logfile():
    return logging.FileHandler(LOGFILE)


with open(LOG_YAML, 'rt') as f:
    yaml = YAML(typ='safe', pure=True)
    config = yaml.load(f.read())
    logging.config.dictConfig(config)

logger = logging.getLogger(TASK_NAME)
ex.logger = logger


@ex.capture
def build_train(N_train, train_batch_size, train_min_num_seq, train_max_num_seq, train_avg_len):
    ntm = build_ntm(N=N_train)
    data_gen = build_data_gen(ntm, train_batch_size, train_min_num_seq, train_max_num_seq, train_avg_len)
    return ntm, data_gen


@ex.capture
def build_test(N_test, test_batch_size, test_min_num_seq, test_max_num_seq, test_avg_len):
Ejemplo n.º 48
0
def loadyaml(yamlp: YAML, source: str) -> Any:
    """Load YAML."""
    return yamlp.load(source)
Ejemplo n.º 49
0
def load_yaml(path):
    with open(path, 'r') as yfile:
        yml = YAML()
        return yml.load(yfile)
Ejemplo n.º 50
0
    3. if same exit, if not, continue
    4. modify bitrise.yml (update stack value)
    '''

    largest_semver = largest_version()
    tmp_file = 'tmp.yml'

    with open(BITRISE_YML, 'r') as infile:

        obj_yaml = YAML()

        # prevents re-formatting of yml file
        obj_yaml.preserve_quotes = True
        obj_yaml.width = 4096

        y = obj_yaml.load(infile)

        current_semver = y['workflows'][WORKFLOW]['meta']['bitrise.io'][
            'stack']

        # remove pattern prefix from current_semver to compare with largest
        current_semver = current_semver.split(pattern)[1]

        if current_semver == largest_semver:
            print('Xcode version unchanged! aborting.')
        else:
            print('New Xcode version available: {0} ... updating bitrise.yml!'.
                  format(largest_semver))
            # add prefix pattern back to be recognizable by bitrise
            # as a valid stack value
            y['workflows'][WORKFLOW]['meta']['bitrise.io'][
 def read_file(self, filename):
     mydoc = open(filename, "r")
     yaml = YAML(typ='safe')
     yaml.default_flow_style = False
     return yaml, yaml.load(mydoc)
Ejemplo n.º 52
0
def read_yaml(file_path):
    with open(file_path, 'r') as stream:
        yaml = YAML()
        return yaml.load(stream.read())
Ejemplo n.º 53
0
You are receiving this email because you indicated that you are interested in {}.  
To unsubscribe, use [this link](%mailing_list_unsubscribe_url%)
""".format

MEETING_MESSAGE_FOOTER = """
---
You are receiving this email because you registered for a VSF Zoom meeting with ID {}.
""".format

if __name__ == "__main__":
    yaml = YAML()
    repo = common.vsf_repo()
    issue = repo.get_issue(int(os.getenv("ISSUE_NUMBER")))
    data = issue.body.replace('\r', '')
    header, body = data.split('---', maxsplit=1)
    header = yaml.load(header)
    if header["to"] in ("vsf_announce", "speakers_corner"):
        to = header["to"] + "@mail.virtualscienceforum.org"
        body += MAILING_LIST_FOOTER(MAILING_LIST_DESCRIPTIONS[header["to"]])
        response = common.api_query(requests.post,
                                    common.MAILGUN_BASE_URL + "messages",
                                    data={
                                        "from": header["from"],
                                        "to": to,
                                        "subject": header["subject"],
                                        "text": common.markdown_to_plain(body),
                                        "html": common.markdown_to_email(body),
                                    })
    else:
        meeting_id = int(header["to"])
        # We are sending an email to zoom meeting participants
Ejemplo n.º 54
0
def _is_new_config(filename, tmp_filename):
    with open(filename, "r") as old_file, open(tmp_filename, "r") as new_file:
        yaml = YAML()
        old_config = yaml.load(old_file)
        new_config = yaml.load(new_file)
    return old_config != new_config
Ejemplo n.º 55
0
from ruamel.yaml import YAML

yaml = YAML(typ='safe')

with open('config.yaml', 'r') as f:
    config = yaml.load(f)
Ejemplo n.º 56
0
def _fix_token(config_file=None, force=False, verify=True):
    from ruamel.yaml import YAML
    yaml = YAML()
    config_file = config_file or TRAVIS_CONFIG_FILE
    with open(config_file, "r") as _file:
        try:
            travis_config = yaml.load(_file)
        except Exception:
            raise ValueError(
                "Failed to parse the travis configuration. "
                "Make sure the config only contains valid YAML and keys as specified by travis."
            )

        # Get the generated token from the top level deploy config added by the travis cli
        try:
            real_token = travis_config["deploy"]["api_key"]["secure"]
        except (TypeError, KeyError):
            raise AssertionError("Can't find any top level deployment tokens")

        try:
            # Find the build stage that deploys to releases
            releases_stages = [
                stage
                for stage in travis_config["jobs"]["include"]
                if stage.get("deploy", dict()).get("provider") == "releases"
            ]
            assert (
                len(releases_stages) > 0
            ), "Can't set the new token because there are no stages deploying to releases"
            assert (
                len(releases_stages) < 2
            ), "Can't set the new token because there are multiple stages deploying to releases"
        except (TypeError, KeyError):
            raise AssertionError(
                "Can't set the new token because there are no deployment stages")

        try:
            is_mock_token = releases_stages[0]["deploy"]["token"]["secure"] == "REPLACE_ME"
            is_same_token = releases_stages[0]["deploy"]["token"]["secure"] == real_token

            unmodified = is_mock_token or is_same_token
        except (TypeError, KeyError):
            unmodified = False

        # Set the new generated token as the stages deploy token
        _create(releases_stages[0], "deploy", "token", "secure")
        releases_stages[0]["deploy"]["token"]["secure"] = real_token

        # Make sure it is fine to overwrite the config file
        assert unmodified or force, (
            'The secure token in the "{}" stage has already been changed. '
            "Retry with --force if you are sure about replacing it.".format(
                releases_stages[0].get("stage", "releases deployment")
            )
        )

        # Remove the top level deploy config added by the travis cli
        travis_config.pop("deploy")

        if not unmodified and verify:
            pprint.pprint(travis_config)
            if (
                not input("Do you want to save this configuration? (y/n) ")
                .strip()
                .lower()
                == "y"
            ):
                return

    # Save the new travis config
    assert travis_config
    with open(config_file, "w") as _file:
        yaml.dump(travis_config, _file)
    print("Fixed!")
import tensorflow as tf
import tensorflow.contrib.slim as slim
from input_ops import create_input_ops
from model import Model
from ruamel.yaml import YAML
from util import log

try:
    import better_exceptions
except ImportError:
    pass

yaml_path = Path('config.yaml')
yaml = YAML(typ='safe')
config = yaml.load(yaml_path)
paths = config['paths']
h5py_dir = paths['h5py_dir']
logs_dir = paths['logs_dir']


class Trainer(object):
    def __init__(self, config, dataset, dataset_test):

        self.config = config

        hyper_parameter_str = config.model + '-is_' + str(config.img_size) + '-bs_' + str(config.batch_size) + \
        '-lr_' + "{:.2E}".format(config.learning_rate) + '-ur_' + str(config.update_rate)
        self.train_dir = logs_dir + '/%s-%s/train_dir/' % (
            hyper_parameter_str, time.strftime("%Y%m%d_%H%M%S"))
        self.test_dir = logs_dir + '/%s-%s/test_dir/' % (
Ejemplo n.º 58
0
def main():
    parser = argparse.ArgumentParser(description='Jasper')
    # model params
    parser.add_argument("--model_config", type=str, required=True)
    parser.add_argument("--eval_datasets", type=str, required=True)
    parser.add_argument("--load_dir", type=str, required=True)
    # run params
    parser.add_argument("--local_rank", default=None, type=int)
    parser.add_argument("--batch_size", default=64, type=int)
    parser.add_argument("--amp_opt_level", default="O1", type=str)
    # store results
    parser.add_argument("--save_logprob", default=None, type=str)

    # lm inference parameters
    parser.add_argument("--lm_path", default=None, type=str)
    parser.add_argument('--alpha', default=2.0, type=float, help='value of LM weight', required=False)
    parser.add_argument(
        '--alpha_max',
        type=float,
        help='maximum value of LM weight (for a grid search in \'eval\' mode)',
        required=False,
    )
    parser.add_argument(
        '--alpha_step', type=float, help='step for LM weight\'s tuning in \'eval\' mode', required=False, default=0.1
    )
    parser.add_argument('--beta', default=1.5, type=float, help='value of word count weight', required=False)
    parser.add_argument(
        '--beta_max',
        type=float,
        help='maximum value of word count weight (for a grid search in \
          \'eval\' mode',
        required=False,
    )
    parser.add_argument(
        '--beta_step',
        type=float,
        help='step for word count weight\'s tuning in \'eval\' mode',
        required=False,
        default=0.1,
    )
    parser.add_argument("--beam_width", default=128, type=int)

    args = parser.parse_args()
    batch_size = args.batch_size
    load_dir = args.load_dir

    if args.local_rank is not None:
        if args.lm_path:
            raise NotImplementedError(
                "Beam search decoder with LM does not currently support evaluation on multi-gpu."
            )
        device = nemo.core.DeviceType.AllGpu
    else:
        device = nemo.core.DeviceType.GPU

    # Instantiate Neural Factory with supported backend
    neural_factory = nemo.core.NeuralModuleFactory(
        local_rank=args.local_rank, optimization_level=args.amp_opt_level, placement=device,
    )

    if args.local_rank is not None:
        logging.info('Doing ALL GPU')

    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)
    try:
        vocab = jasper_params['labels']
        sample_rate = jasper_params['sample_rate']
    except KeyError:
        logging.error("Please make sure you are using older config format (the ones with -old suffix)")
        exit(1)

    eval_datasets = args.eval_datasets

    eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layer = nemo_asr.AudioToTextDataLayer(
        manifest_filepath=eval_datasets,
        sample_rate=sample_rate,
        labels=vocab,
        batch_size=batch_size,
        **eval_dl_params,
    )

    N = len(data_layer)
    logging.info('Evaluating {0} examples'.format(N))

    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
        sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"]
    )
    jasper_encoder = nemo_asr.JasperEncoder(
        feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"]
    )
    jasper_decoder = nemo_asr.JasperDecoderForCTC(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab)
    )
    greedy_decoder = nemo_asr.GreedyCTCDecoder()

    logging.info('================================')
    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
    logging.info('================================')

    # Define inference DAG
    audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1 = data_layer()
    processed_signal_e1, p_length_e1 = data_preprocessor(input_signal=audio_signal_e1, length=a_sig_length_e1)
    encoded_e1, encoded_len_e1 = jasper_encoder(audio_signal=processed_signal_e1, length=p_length_e1)
    log_probs_e1 = jasper_decoder(encoder_output=encoded_e1)
    predictions_e1 = greedy_decoder(log_probs=log_probs_e1)

    eval_tensors = [log_probs_e1, predictions_e1, transcript_e1, transcript_len_e1, encoded_len_e1]

    # inference
    evaluated_tensors = neural_factory.infer(tensors=eval_tensors, checkpoint_dir=load_dir)

    greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab)
    references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab)

    wer = word_error_rate(hypotheses=greedy_hypotheses, references=references)
    logging.info("Greedy WER {:.2f}%".format(wer * 100))

    # Convert logits to list of numpy arrays
    logprob = []
    for i, batch in enumerate(evaluated_tensors[0]):
        for j in range(batch.shape[0]):
            logprob.append(batch[j][: evaluated_tensors[4][i][j], :].cpu().numpy())
    if args.save_logprob:
        with open(args.save_logprob, 'wb') as f:
            pickle.dump(logprob, f, protocol=pickle.HIGHEST_PROTOCOL)

    # language model
    if args.lm_path:
        if args.alpha_max is None:
            args.alpha_max = args.alpha
        # include alpha_max in tuning range
        args.alpha_max += args.alpha_step / 10.0

        if args.beta_max is None:
            args.beta_max = args.beta
        # include beta_max in tuning range
        args.beta_max += args.beta_step / 10.0

        beam_wers = []

        logprobexp = [np.exp(p) for p in logprob]
        for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step):
            for beta in np.arange(args.beta, args.beta_max, args.beta_step):
                logging.info('================================')
                logging.info(f'Infering with (alpha, beta): ({alpha}, {beta})')
                beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
                    vocab=vocab,
                    beam_width=args.beam_width,
                    alpha=alpha,
                    beta=beta,
                    lm_path=args.lm_path,
                    num_cpus=max(os.cpu_count(), 1),
                    input_tensor=False,
                )

                beam_predictions = beam_search_with_lm(log_probs=logprobexp, log_probs_length=None, force_pt=True)

                beam_predictions = [b[0][1] for b in beam_predictions[0]]
                lm_wer = word_error_rate(hypotheses=beam_predictions, references=references)
                logging.info("Beam WER {:.2f}%".format(lm_wer * 100))
                beam_wers.append(((alpha, beta), lm_wer * 100))

        logging.info('Beam WER for (alpha, beta)')
        logging.info('================================')
        logging.info('\n' + '\n'.join([str(e) for e in beam_wers]))
        logging.info('================================')
        best_beam_wer = min(beam_wers, key=lambda x: x[1])
        logging.info('Best (alpha, beta): ' f'{best_beam_wer[0]}, ' f'WER: {best_beam_wer[1]:.2f}%')
Ejemplo n.º 59
0
class Config(object):
    def __init__(self, configFile: str):
        self.configFile = configFile
        self._configData = {}
        self.yaml = YAML()
        self._inBaseConfig = []

    def loadConfig(self) -> None:
        configData = self._readConfig(self.configFile)
        self._validate(configData)
        self._configData = configData

    def _readConfig(self, fileName: str) -> Dict:
        try:
            with open(fileName, mode='r') as config:
                configData = self.yaml.load(config)
                if not configData:
                    configData = {}
                # if this is the base server config, store what keys we loaded
                if fileName == self.configFile:
                    self._inBaseConfig = list(configData.keys())
        except Exception as e:
            raise ConfigError(fileName, e)

        if 'import' not in configData:
            return configData

        for fname in configData['import']:
            includeConfig = self._readConfig('{}/{}.yaml'
                                             .format(os.path.dirname(os.path.abspath(fileName)),
                                                     fname))
            for key, val in includeConfig.items():
                # not present in base config, just assign it
                if key not in configData:
                    configData[key] = val
                    continue
                # skip non-collection types that are already set
                if isinstance(configData[key], (str, int)):
                    continue
                if isinstance(val, str):
                    raise ConfigError(fname, 'The included config file tried '
                                             'to merge a non-string with a '
                                             'string')
                try:
                    iter(configData[key])
                    iter(val)
                except TypeError:
                    # not a collection, so just don't merge them
                    pass
                else:
                    try:
                        # merge with + operator
                        configData[key] += val
                    except TypeError:
                        # dicts can't merge with +
                        try:
                            for subKey, subVal in val.items():
                                if subKey not in configData[key]:
                                    configData[key][subKey] = subVal
                        except (AttributeError, TypeError):
                            # if either of these, they weren't both dicts.
                            raise ConfigError(fname, 'The variable {!r} could '
                                                     'not be successfully '
                                                     'merged'.format(key))

        return configData

    def writeConfig(self) -> None:
        # filter the configData to only those keys
        # that were present in the base server config,
        # or have been modified at runtime
        configData = copy.deepcopy(self._configData)
        to_delete = set(configData.keys()).difference(self._inBaseConfig)
        for key in to_delete:
            del configData[key]

        # write the filtered configData
        try:
            with open(self.configFile, mode='w') as config:
                self.yaml.dump(configData, config)
        except Exception as e:
            raise ConfigError(self.configFile, e)

    def getWithDefault(self, key: str, default=None) -> Any:
        if key in self._configData:
            return self._configData[key]
        return default

    def _validate(self, configData) -> None:
        for key in _required:
            if key not in configData:
                raise ConfigError(self.configFile,
                                  'Required item {!r} was not found in the config.'.format(key))

    def __len__(self):
        return len(self._configData)

    def __iter__(self):
        return iter(self._configData)

    def __getitem__(self, key):
        return self._configData[key]

    def __setitem__(self, key, value):
        # mark this key to be saved in the server config
        if key not in self._inBaseConfig:
            self._inBaseConfig.append(key)

        self._configData[key] = value

    def __contains__(self, key):
        return key in self._configData
Ejemplo n.º 60
0
def create_all_dags(args, neural_factory):
    """Create Directed Acyclic Graph (DAG) for training and evaluation
    """
    logger = neural_factory.logger
    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)
    vocab = jasper_params['labels']
    sample_rate = jasper_params['sample_rate']

    # Calculate num_workers for dataloader
    total_cpus = os.cpu_count()
    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)

    # train params
    # perturb_config = jasper_params.get('perturb', None)
    train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"])
    del train_dl_params["train"]
    del train_dl_params["eval"]
    # del train_dl_params["normalize_transcripts"]

    data_layer = nemo_asr.AudioToTextDataLayer(
        manifest_filepath=args.train_dataset,
        sample_rate=sample_rate,
        labels=vocab,
        batch_size=args.batch_size,
        num_workers=cpu_per_traindl,
        **train_dl_params,
        # normalize_transcripts=False
    )

    N = len(data_layer)
    steps_per_epoch = int(N / (args.batch_size * args.num_gpus))
    logger.info('Have {0} examples to train on.'.format(N))

    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
        sample_rate=sample_rate,
        **jasper_params["AudioToMelSpectrogramPreprocessor"])

    multiply_batch_config = jasper_params.get('MultiplyBatch', None)
    if multiply_batch_config:
        multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config)

    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
    if spectr_augment_config:
        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(
            **spectr_augment_config)

    eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layers_eval = []

    if args.eval_datasets:
        for eval_datasets in args.eval_datasets:
            data_layer_eval = nemo_asr.AudioToTextDataLayer(
                manifest_filepath=eval_datasets,
                sample_rate=sample_rate,
                labels=vocab,
                batch_size=args.eval_batch_size,
                num_workers=cpu_per_traindl,
                **eval_dl_params,
            )

            data_layers_eval.append(data_layer_eval)
    else:
        neural_factory.logger.info("There were no val datasets passed")

    # set encoder and decoders
    jasper_encoder = nemo_asr.JasperEncoder(
        feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"],
        **jasper_params["JasperEncoder"])

    jasper_decoder = nemo_asr.JasperDecoderForCTC(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
        num_classes=len(vocab),
        factory=neural_factory)

    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))

    greedy_decoder = nemo_asr.GreedyCTCDecoder()

    # finetuning - restore encoder and decoder
    if args.finetune:
        logger.info("Finetuning")
        jasper_encoder.restore_from(args.load_encoder)
        logger.info("Loaded encoder: {}".format(args.load_encoder))
        if args.load_decoder != "":
            jasper_decoder.restore_from(args.load_decoder)
            logger.info("Loaded decoder: {}".format(args.load_decoder))

    logger.info('================================')
    logger.info(
        f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    logger.info(
        f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    logger.info(f"Total number of parameters: "
                f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
    logger.info('================================')

    # Train DAG
    audio_signal_t, a_sig_length_t, \
        transcript_t, transcript_len_t = data_layer()
    processed_signal_t, p_length_t = data_preprocessor(
        input_signal=audio_signal_t, length=a_sig_length_t)

    if multiply_batch_config:
        processed_signal_t, p_length_t, transcript_t, transcript_len_t = \
            multiply_batch(
                in_x=processed_signal_t, in_x_len=p_length_t,
                in_y=transcript_t,
                in_y_len=transcript_len_t)

    if spectr_augment_config:
        processed_signal_t = data_spectr_augmentation(
            input_spec=processed_signal_t)

    encoded_t, encoded_len_t = jasper_encoder(audio_signal=processed_signal_t,
                                              length=p_length_t)
    log_probs_t = jasper_decoder(encoder_output=encoded_t)
    predictions_t = greedy_decoder(log_probs=log_probs_t)
    loss_t = ctc_loss(log_probs=log_probs_t,
                      targets=transcript_t,
                      input_length=encoded_len_t,
                      target_length=transcript_len_t)

    # Callbacks needed to print info to console and Tensorboard
    train_callback = nemo.core.SimpleLossLoggerCallback(
        tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
        print_func=partial(monitor_asr_train_progress,
                           labels=vocab,
                           logger=logger),
        get_tb_values=lambda x: [("loss", x[0])],
        tb_writer=neural_factory.tb_writer,
    )

    chpt_callback = nemo.core.CheckpointCallback(
        folder=neural_factory.checkpoint_dir,
        step_freq=args.checkpoint_save_freq)

    callbacks = [train_callback, chpt_callback]

    # assemble eval DAGs
    for i, eval_dl in enumerate(data_layers_eval):
        audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e = \
            eval_dl()
        processed_signal_e, p_length_e = data_preprocessor(
            input_signal=audio_signal_e, length=a_sig_length_e)
        encoded_e, encoded_len_e = jasper_encoder(
            audio_signal=processed_signal_e, length=p_length_e)
        log_probs_e = jasper_decoder(encoder_output=encoded_e)
        predictions_e = greedy_decoder(log_probs=log_probs_e)
        loss_e = ctc_loss(log_probs=log_probs_e,
                          targets=transcript_e,
                          input_length=encoded_len_e,
                          target_length=transcript_len_e)

        # create corresponding eval callback
        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[
                loss_e, predictions_e, transcript_e, transcript_len_e
            ],
            user_iter_callback=partial(process_evaluation_batch, labels=vocab),
            user_epochs_done_callback=partial(process_evaluation_epoch,
                                              tag=tagname,
                                              logger=logger),
            eval_step=args.eval_freq,
            tb_writer=neural_factory.tb_writer)

        callbacks.append(eval_callback)
    return loss_t, callbacks, steps_per_epoch