Ejemplo n.º 1
0
def deployment_yaml_string_to_pb(deployment_yaml_string):
    yaml = YAML()
    deployment_yaml = yaml.load(deployment_yaml_string)
    return deployment_dict_to_pb(deployment_yaml)
Ejemplo n.º 2
0
import subprocess
import sys
import tempfile
import pytest

from contextlib import redirect_stderr, redirect_stdout
from pathlib import Path
from textwrap import dedent
from ruamel.yaml import YAML

from auth import KeyProvider
from utils import print_colour
from file_acquisition import get_decrypted_file, get_decrypted_files

# Without `pure=True`, I get an exception about str / byte issues
yaml = YAML(typ="safe", pure=True)
helm_charts_dir = Path(__file__).parent.parent.joinpath("helm-charts")


class Hub:
    """
    A single, deployable JupyterHub
    """
    def __init__(self, cluster, spec):
        self.cluster = cluster
        self.spec = spec

    def get_generated_config(self, auth_provider: KeyProvider, secret_key):
        """
        Generate config automatically for each hub
Ejemplo n.º 3
0
    win_wshshl.SendKeys('{F13}')


def say_nihao(systray):
    syncSpeak('你好')


def make_beep(systray):
    winsound.Beep(1000, 1000)


# ━━━ 读写配置 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

from ruamel.yaml import YAML

yaml = YAML()
from bin.common import count_file


def get_count():  # 直接从配置文件中读取count值
    with open(count_file, encoding='utf-8') as f:
        return yaml.load(f)['count']


def set_count(count):  # 直接像配置文件中读取count值
    with open(count_file, 'w', encoding='utf-8') as f:
        yaml.dump({'count': count}, f)


# ■■■ 核心功能 ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
Ejemplo n.º 4
0
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This module handles string related IO.
"""

from io import StringIO

from ruamel.yaml import YAML

yaml = YAML(typ='unsafe')


def read_yaml_str(content: str) -> object:
    """Parse the given yaml str and return the python object."""
    return yaml.load(content)


def to_yaml_str(obj: object) -> str:
    """Converts the given python object into a YAML string."""
    stream = StringIO()
    yaml.dump(obj, stream)
    return stream.getvalue()
def update_lastmod(verbose):
    count = 0
    yaml = YAML()

    for post in glob.glob(os.path.join(POSTS_PATH, "*.md")):

        git_log_count = subprocess.getoutput(
            "git log --pretty=%ad {} | wc -l".format(post))

        if git_log_count == "1":
            continue

        git_lastmod = subprocess.getoutput(
            "git log -1 --pretty=%ad --date=iso " + post)

        if not git_lastmod:
            continue

        lates_commit = subprocess.getoutput("git log -1 --pretty=%B " + post)

        if "[Automation]" in lates_commit and "Lastmod" in lates_commit:
            continue

        frontmatter, line_num = get_yaml(post)
        meta = yaml.load(frontmatter)

        if 'seo' in meta:
            if ('date_modified' in meta['seo']
                    and meta['seo']['date_modified'] == git_lastmod):
                continue
            else:
                meta['seo']['date_modified'] = git_lastmod
        else:
            meta.insert(line_num, 'seo', dict(date_modified=git_lastmod))

        output = 'new.md'
        if os.path.isfile(output):
            os.remove(output)

        with open(output, 'w') as new, open(post, 'r') as old:
            new.write("---\n")
            yaml.dump(meta, new)
            new.write("---\n")
            line_num += 2

            lines = old.readlines()

            for line in lines:
                if line_num > 0:
                    line_num -= 1
                    continue
                else:
                    new.write(line)

        shutil.move(output, post)
        count += 1

        if verbose:
            print("[INFO] update 'lastmod' for:" + post)

    if count > 0:
        print("[INFO] Success to update lastmod for {} post(s).".format(count))
Ejemplo n.º 6
0
def _get_yaml():
    y = YAML(typ='safe')
    y.default_flow_style = False
    return y
def main():
    args = parse_args()
    name = construct_name(
        args.exp_name,
        args.lr,
        args.batch_size,
        args.max_steps,
        args.num_epochs,
        args.weight_decay,
        args.optimizer,
        args.iter_per_step,
    )

    # time stamp
    date_time = datetime.now().strftime("%m-%d-%Y -- %H-%M-%S")

    log_dir = name
    if args.work_dir:
        log_dir = os.path.join(args.work_dir, name)

    if args.tensorboard_dir is None:
        tensorboard_dir = os.path.join(name, 'tensorboard', date_time)
    else:
        tensorboard_dir = args.tensorboard_dir

    if args.checkpoint_dir is None:
        checkpoint_dir = os.path.join(name, date_time)
    else:
        base_checkpoint_dir = args.checkpoint_dir
        if len(glob.glob(os.path.join(base_checkpoint_dir, '*.pt'))) > 0:
            checkpoint_dir = base_checkpoint_dir
        else:
            checkpoint_dir = os.path.join(args.checkpoint_dir, date_time)

    # instantiate Neural Factory with supported backend
    neural_factory = nemo.core.NeuralModuleFactory(
        backend=nemo.core.Backend.PyTorch,
        local_rank=args.local_rank,
        optimization_level=args.amp_opt_level,
        log_dir=log_dir,
        checkpoint_dir=checkpoint_dir,
        create_tb_writer=args.create_tb_writer,
        files_to_copy=[args.model_config, __file__],
        cudnn_benchmark=args.cudnn_benchmark,
        tensorboard_dir=tensorboard_dir,
    )
    args.num_gpus = neural_factory.world_size

    if args.local_rank is not None:
        logging.info('Doing ALL GPU')

    # build dags
    train_loss, callbacks, steps_per_epoch = create_all_dags(
        args, neural_factory)

    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)

    lr_schedule = jasper_params.get('lr_schedule', 'CosineAnnealing')

    if lr_schedule == 'CosineAnnealing':
        lr_policy = CosineAnnealing(
            total_steps=args.max_steps if args.max_steps is not None else
            args.num_epochs * steps_per_epoch,
            warmup_ratio=args.warmup_ratio,
            min_lr=args.min_lr,
        )
    elif lr_schedule == 'PolynomialDecayAnnealing':
        lr_policy = PolynomialDecayAnnealing(
            total_steps=args.max_steps if args.max_steps is not None else
            args.num_epochs * steps_per_epoch,
            warmup_ratio=args.warmup_ratio,
            min_lr=args.min_lr,
            power=2.0,
        )
    elif lr_schedule == 'PolynomialHoldDecayAnnealing':
        lr_policy = PolynomialHoldDecayAnnealing(
            total_steps=args.max_steps if args.max_steps is not None else
            args.num_epochs * steps_per_epoch,
            warmup_ratio=args.warmup_ratio,
            hold_ratio=args.hold_ratio,
            min_lr=args.min_lr,
            power=2.0,
        )
    else:
        raise ValueError("LR schedule is invalid !")

    logging.info(f"Using `{lr_policy}` Learning Rate Scheduler")

    # train model
    neural_factory.train(
        tensors_to_optimize=[train_loss],
        callbacks=callbacks,
        lr_policy=lr_policy,
        optimizer=args.optimizer,
        optimization_params={
            "num_epochs": args.num_epochs,
            "max_steps": args.max_steps,
            "lr": args.lr,
            "momentum": 0.95,
            "betas": (args.beta1, args.beta2),
            "weight_decay": args.weight_decay,
            "grad_norm_clip": None,
        },
        batches_per_step=args.iter_per_step,
    )
Ejemplo n.º 8
0
        )
        args = parser.parse_args()

        render = 1
        if args.unity:
                render = 0

        model_name = args.model
        epoch = ""

        # load model
        model_path = os.path.join("../../../inverted_pendulum", "trained_models", "drone", model_name)
        net, param_dict = load_model(model_path, epoch=epoch)

        # load config
        cfg = YAML().load(open(os.environ["FLIGHTMARE_PATH"] +
                        "/flightlib/configs/vec_env.yaml", 'r'))
        # print(dump(cfg, Dumper=RoundTripDumper))
        cfg["env"]["num_envs"] = 1

        # initialize dataset
        dataset = DroneDataset(1, 1, **param_dict)

        # make evaluator
        evaluator = QuadEvaluator(net, dataset, render=render, **param_dict)
        evaluator.eval_env = FlightmareWrapper(
                param_dict["dt"],
                wrapper.FlightEnvVec(QuadrotorEnv_v1(
                dump(cfg, Dumper=RoundTripDumper), False))
        )

        if args.unity:
    def migrate(self, feedstock, branch):
        user = "******"
        project = "%s-feedstock" % feedstock

        if branch == "master":
            # put the staging token into BINSTAR_TOKEN
            subprocess.run(
                "conda smithy update-binstar-token "
                "--without-appveyor --without-azure "
                "--token_name BINSTAR_TOKEN",
                shell=True,
                check=True)
            print("    putting cf-staging binstar token in BINSTAR_TOKEN")

            # put the staging token into STAGING_BINSTAR_TOKEN
            subprocess.run(
                "conda smithy update-binstar-token "
                "--without-appveyor --without-azure "
                "--token_name STAGING_BINSTAR_TOKEN",
                shell=True,
                check=True)
            print(
                "    putting cf-staging binstar token in STAGING_BINSTAR_TOKEN"
            )

            # needs a change in smithy so cannot do this
            # # remove STAGING_BINSTAR_TOKEN from travis, circle and drone
            # _delete_token_in_circle(user, project, "STAGING_BINSTAR_TOKEN")
            # print("    deleted STAGING_BINSTAR_TOKEN from circle")
            #
            # _delete_token_in_drone(user, project, "STAGING_BINSTAR_TOKEN")
            # print("    deleted STAGING_BINSTAR_TOKEN from drone")
            #
            # _delete_token_in_travis(user, project, "STAGING_BINSTAR_TOKEN")
            # print("    deleted STAGING_BINSTAR_TOKEN from travis")

            # remove BINSTAR_TOKEN and STAGING_BINSTAR_TOKEN from azure
            _delete_tokens_in_azure(
                user,
                project,
                ["BINSTAR_TOKEN", "STAGING_BINSTAR_TOKEN"],
            )
            print(
                "    deleted BINSTAR_TOKEN and STAGING_BINSTAR_TOKEN from azure"
            )

        # cleanup conda-forge.yml
        yaml = YAML()
        cfg = _read_conda_forge_yaml(yaml)
        _cleanup_cfgy(cfg, "travis", "BINSTAR_TOKEN")
        _cleanup_cfgy(cfg, "appveyor", "BINSTAR_TOKEN")
        with open("conda-forge.yml", "w") as fp:
            yaml.dump(cfg, fp)
        subprocess.run(
            ["git", "add", "conda-forge.yml"],
            check=True,
        )
        print("    updated conda-forge.yml")

        # migration done, make a commit, lots of API calls
        return True, True, True
Ejemplo n.º 10
0
Archivo: main.py Proyecto: macbury/yal
import os
import time
import logging
import paho.mqtt.client as mqtt
from circleci.api import Api
from ruamel.yaml import YAML

logging.basicConfig(level=logging.INFO,
                    format="[%(asctime)s] %(levelname)s:%(name)s:%(message)s")

CONFIG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                           'config.yaml')
LOGGER = logging.getLogger('lamp')

LOGGER.info("Loading config: " + CONFIG_PATH)
CONFIG = YAML(typ='safe').load(open(CONFIG_PATH))

circleci = Api(CONFIG['circle_token'])
client = mqtt.Client()

if 'username' in CONFIG:
    client.username_pw_set(CONFIG['username'], CONFIG['password'])
    client.connect(CONFIG['host'], CONFIG['port'], 60)
    client.loop_start()

try:
    while True:
        LOGGER.info("Fetching build status...")
        build = circleci.get_recent_builds()[0]
        status = build['status']
        LOGGER.info("Status is: {}".format(status))
Ejemplo n.º 11
0
 def __init__(self, _list=list):
     self.teams = _list
     self.yaml = YAML()
     self.yaml.preserve_quotes = True
Ejemplo n.º 12
0
def load_benchmarks_yml():
    with open(Path(__file__).parent.parent / "benchmarks.yml") as benchmarks_yml:
        return YAML(typ="safe").load(benchmarks_yml.read())
Ejemplo n.º 13
0
import glob
import io
import json
import os
import subprocess
import sys
import tempfile
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple

from ruamel.yaml import YAML

yaml = YAML(typ="rt")

FEATURES = ["dots", "equivalence", "metavar", "misc"]

VERBOSE_REGEXP_SYNTAX = 'OCaml Syntax: "=~/<regexp>/"'

VERBOSE_FEATURE_NAME = {
    "dots": "Wildcard Matches (...)",
    "equivalence": "Helpful Features",
    "metavar": "Named Placeholders ($X)",
    "misc": "Others",
    "metavar_equality": "Reoccurring Expressions",
    "concrete": "Exact Matches",
    "regexp": "Regular Expressions",
    "deep": "Deep (Recursive) Matching",
}
Ejemplo n.º 14
0
def write_config(configname, cfg):
    with open(configname, "w") as file:
        YAML().dump(cfg, file)
Ejemplo n.º 15
0
def to_yaml_string(data):
    stream = StringIO()
    yaml = YAML()
    yaml.default_flow_style = False
    yaml.dump(data, stream)
    return stream.getvalue()
Ejemplo n.º 16
0
def _ruamel(f, constructors=None):
    from ruamel.yaml import YAML

    yaml = YAML(typ="safe")
    return list(yaml.load_all(f))
Ejemplo n.º 17
0
def get_yaml(path: str) -> CommentedMap:
    bytes_data = get_data(path)
    # Replace CRLF or yaml loader will load extra lines
    string_data = bytes_data.decode('utf8').replace('\r\n', '\n')
    ret = YAML().load(string_data)
    return ret
Ejemplo n.º 18
0
def write(model_name, data, output_dir):
    """Write data structure to YAML and csv
    """
    project_data, intervals, interventions, units, model_data, extra = data

    yaml = YAML()

    # project
    with open(project_yaml_file(output_dir), 'w',
              encoding='utf-8') as project_file:
        yaml.dump(project_data, project_file)

    # intervals
    intervals_filename = os.path.join(output_dir, 'data',
                                      'interval_definitions',
                                      '{}_intervals.csv'.format(model_name))
    with open(intervals_filename, 'w', encoding='utf-8',
              newline='') as intervals_file:
        fieldnames = ('id', 'start_hour', 'end_hour')
        writer = csv.DictWriter(intervals_file, fieldnames)
        writer.writeheader()
        writer.writerows(intervals)

    # interventions
    interventions_filename = os.path.join(
        output_dir, 'data', 'interventions',
        '{}_interventions.yml'.format(model_name))
    with open(interventions_filename, 'w',
              encoding='utf-8') as interventions_file:
        yaml.dump(interventions, interventions_file)

    # units
    units_filename = os.path.join(output_dir, 'data',
                                  '{}_units.txt'.format(model_name))
    with open(units_filename, 'w', encoding='utf-8', newline='') as units_file:
        fieldnames = ('unit_name', 'description')
        writer = csv.DictWriter(units_file, fieldnames, delimiter='=')
        writer.writeheader()
        writer.writerows(units)

    # model
    model_filename = os.path.join(output_dir, 'config', 'sector_models',
                                  '{}.yml'.format(model_name))
    with open(model_filename, 'w', encoding='utf-8') as model_file:
        yaml.dump(model_data, model_file)

    # wrapper
    wrapper_parameters = ''
    for parameter in model_data['parameters']:
        identifier = clean('parameter_' + str(parameter['name']))
        wrapper_parameters += '{0} = data.get_parameter(\'{1}\')\n\t\t'.format(
            identifier, parameter['name'])
        wrapper_parameters += 'self.logger.info(\'Parameter {1}: %s\', {0})\n\t\t'.format(
            identifier,
            str(parameter['name']).replace("_", " ").capitalize())

    wrapper_inputs = ''
    for input in model_data['inputs']:
        identifier = clean('input_' + str(input['name']))
        wrapper_inputs += '{0} = data.get_data("{1}")\n\t\t'.format(
            identifier, input['name'])
        wrapper_inputs += 'self.logger.info(\'Input {1}: %s\', {0})\n\t\t'.format(
            identifier,
            str(input['name']).replace("_", " ").capitalize())

    wrapper_outputs = ''
    for output in model_data['outputs']:
        wrapper_outputs += 'data.set_results("{0}", None)\n\t\t'.format(
            output['name'])

    # ensure models dir exists
    try:
        os.mkdir(os.path.join(output_dir, 'models'))
    except FileExistsError:
        pass

    with open(WRAPPER_TEMPLATE, 'r') as source, open(
            os.path.join(output_dir, 'models', '{}.py'.format(model_name)),
            'w') as sink:
        for line in source.readlines():
            sink.write(
                line.format(model_name=model_name,
                            model_name_rm_=model_name.replace("_", " "),
                            model_name_cap=model_name.replace(
                                "_", " ").capitalize(),
                            model_parameters=wrapper_parameters,
                            model_inputs=wrapper_inputs,
                            model_outputs=wrapper_outputs))

    # extras
    for sheet_name, data in extra.items():
        filename = os.path.join(output_dir,
                                '{}__{}.yml'.format(model_name, sheet_name))
        with open(filename, 'w', encoding='utf-8') as file_handle:
            yaml.dump(data, file_handle)
Ejemplo n.º 19
0
def create_all_dags(args, neural_factory):
    logger = neural_factory.logger
    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)
    vocab = jasper_params['labels']
    sample_rate = jasper_params['sample_rate']

    # Calculate num_workers for dataloader
    total_cpus = os.cpu_count()
    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)

    # perturb_config = jasper_params.get('perturb', None)
    train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"])
    del train_dl_params["train"]
    del train_dl_params["eval"]
    # del train_dl_params["normalize_transcripts"]

    data_layer = nemo_asr.AudioToTextDataLayer(
        manifest_filepath=args.train_dataset,
        sample_rate=sample_rate,
        labels=vocab,
        batch_size=args.batch_size,
        num_workers=cpu_per_traindl,
        **train_dl_params,
        # normalize_transcripts=False
    )

    N = len(data_layer)
    steps_per_epoch = int(N / (args.batch_size * args.num_gpus))
    logger.info('Have {0} examples to train on.'.format(N))

    data_preprocessor = nemo_asr.AudioPreprocessing(
        sample_rate=sample_rate, **jasper_params["AudioPreprocessing"])

    multiply_batch_config = jasper_params.get('MultiplyBatch', None)
    if multiply_batch_config:
        multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config)

    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
    if spectr_augment_config:
        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(
            **spectr_augment_config)

    eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
    eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layers_eval = []

    if args.eval_datasets:
        for eval_datasets in args.eval_datasets:
            data_layer_eval = nemo_asr.AudioToTextDataLayer(
                manifest_filepath=eval_datasets,
                sample_rate=sample_rate,
                labels=vocab,
                batch_size=args.eval_batch_size,
                num_workers=cpu_per_traindl,
                **eval_dl_params,
            )

            data_layers_eval.append(data_layer_eval)
    else:
        neural_factory.logger.info("There were no val datasets passed")

    jasper_encoder = nemo_asr.JasperEncoder(
        feat_in=jasper_params["AudioPreprocessing"]["features"],
        **jasper_params["JasperEncoder"])

    jasper_decoder = nemo_asr.JasperDecoderForCTC(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
        num_classes=len(vocab),
        factory=neural_factory)

    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))

    greedy_decoder = nemo_asr.GreedyCTCDecoder()

    logger.info('================================')
    logger.info(
        f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    logger.info(
        f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    logger.info(f"Total number of parameters in decoder: "
                f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
    logger.info('================================')

    # Train DAG
    audio_signal_t, a_sig_length_t, \
        transcript_t, transcript_len_t = data_layer()
    processed_signal_t, p_length_t = data_preprocessor(
        input_signal=audio_signal_t, length=a_sig_length_t)

    if multiply_batch_config:
        processed_signal_t, p_length_t, transcript_t, transcript_len_t = \
            multiply_batch(
                in_x=processed_signal_t, in_x_len=p_length_t,
                in_y=transcript_t,
                in_y_len=transcript_len_t)

    if spectr_augment_config:
        processed_signal_t = data_spectr_augmentation(
            input_spec=processed_signal_t)

    encoded_t, encoded_len_t = jasper_encoder(audio_signal=processed_signal_t,
                                              length=p_length_t)
    log_probs_t = jasper_decoder(encoder_output=encoded_t)
    predictions_t = greedy_decoder(log_probs=log_probs_t)
    loss_t = ctc_loss(log_probs=log_probs_t,
                      targets=transcript_t,
                      input_length=encoded_len_t,
                      target_length=transcript_len_t)

    # Callbacks needed to print info to console and Tensorboard
    train_callback = nemo.core.SimpleLossLoggerCallback(
        tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
        print_func=partial(monitor_asr_train_progress,
                           labels=vocab,
                           logger=logger),
        get_tb_values=lambda x: [("loss", x[0])],
        tb_writer=neural_factory.tb_writer,
    )

    chpt_callback = nemo.core.CheckpointCallback(
        folder=neural_factory.checkpoint_dir,
        step_freq=args.checkpoint_save_freq)

    callbacks = [train_callback, chpt_callback]

    # assemble eval DAGs
    for i, eval_dl in enumerate(data_layers_eval):
        audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e = \
            eval_dl()
        processed_signal_e, p_length_e = data_preprocessor(
            input_signal=audio_signal_e, length=a_sig_length_e)
        encoded_e, encoded_len_e = jasper_encoder(
            audio_signal=processed_signal_e, length=p_length_e)
        log_probs_e = jasper_decoder(encoder_output=encoded_e)
        predictions_e = greedy_decoder(log_probs=log_probs_e)
        loss_e = ctc_loss(log_probs=log_probs_e,
                          targets=transcript_e,
                          input_length=encoded_len_e,
                          target_length=transcript_len_e)

        # create corresponding eval callback
        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[
                loss_e, predictions_e, transcript_e, transcript_len_e
            ],
            user_iter_callback=partial(process_evaluation_batch, labels=vocab),
            user_epochs_done_callback=partial(process_evaluation_epoch,
                                              tag=tagname,
                                              logger=logger),
            eval_step=args.eval_freq,
            tb_writer=neural_factory.tb_writer)

        callbacks.append(eval_callback)
    return loss_t, callbacks, steps_per_epoch
Ejemplo n.º 20
0
    def _run_core_command(
        self,
        patterns_json: List[Any],
        patterns: List[Pattern],
        targets: List[Path],
        language: Language,
        rule: Rule,
        rules_file_flag: str,
        cache_dir: str,
    ) -> dict:
        with tempfile.NamedTemporaryFile(
            "w"
        ) as pattern_file, tempfile.NamedTemporaryFile(
            "w"
        ) as target_file, tempfile.NamedTemporaryFile(
            "w"
        ) as equiv_file:
            yaml = YAML()
            yaml.dump({"rules": patterns_json}, pattern_file)
            pattern_file.flush()
            target_file.write("\n".join(str(t) for t in targets))
            target_file.flush()

            cmd = [SEMGREP_PATH] + [
                "-lang",
                language,
                rules_file_flag,
                pattern_file.name,
                "-j",
                str(self._jobs),
                "-target_file",
                target_file.name,
                "-use_parsing_cache",
                cache_dir,
                "-timeout",
                str(self._timeout),
                "-max_memory",
                str(self._max_memory),
            ]

            equivalences = rule.equivalences
            if equivalences:
                self._write_equivalences_file(equiv_file, equivalences)
                cmd += ["-equivalences", equiv_file.name]

            core_run = sub_run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            logger.debug(core_run.stderr.decode("utf-8", "replace"))

            if core_run.returncode != 0:
                output_json = self._parse_core_output(core_run.stdout)

                if "error" in output_json:
                    self._raise_semgrep_error_from_json(output_json, patterns)
                else:
                    raise SemgrepError(
                        f"unexpected json output while invoking semgrep-core:\n{PLEASE_FILE_ISSUE_TEXT}"
                    )

            output_json = self._parse_core_output(core_run.stdout)

            return output_json
def create_all_dags(args, neural_factory):
    yaml = YAML(typ="safe")
    with open(args.model_config) as f:
        jasper_params = yaml.load(f)

    labels = jasper_params['labels']  # Vocab of tokens
    sample_rate = jasper_params['sample_rate']

    # Calculate num_workers for dataloader
    total_cpus = os.cpu_count()
    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)

    # perturb_config = jasper_params.get('perturb', None)
    train_dl_params = copy.deepcopy(
        jasper_params["AudioToSpeechLabelDataLayer"])
    train_dl_params.update(
        jasper_params["AudioToSpeechLabelDataLayer"]["train"])
    del train_dl_params["train"]
    del train_dl_params["eval"]
    # del train_dl_params["normalize_transcripts"]

    # Look for augmentations
    audio_augmentor = jasper_params.get('AudioAugmentor', None)

    data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
        manifest_filepath=args.train_dataset,
        labels=labels,
        sample_rate=sample_rate,
        batch_size=args.batch_size,
        num_workers=cpu_per_traindl,
        augmentor=audio_augmentor,
        **train_dl_params,
    )

    crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(
        audio_length=128)

    N = len(data_layer)
    steps_per_epoch = math.ceil(
        N / (args.batch_size * args.iter_per_step * args.num_gpus))
    logging.info('Steps per epoch : {0}'.format(steps_per_epoch))
    logging.info('Have {0} examples to train on.'.format(N))

    data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(
        sample_rate=sample_rate,
        **jasper_params["AudioToMFCCPreprocessor"],
    )

    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
    if spectr_augment_config:
        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(
            **spectr_augment_config)

    eval_dl_params = copy.deepcopy(
        jasper_params["AudioToSpeechLabelDataLayer"])
    eval_dl_params.update(jasper_params["AudioToSpeechLabelDataLayer"]["eval"])
    del eval_dl_params["train"]
    del eval_dl_params["eval"]
    data_layers_eval = []

    if args.eval_datasets:
        for eval_datasets in args.eval_datasets:
            data_layer_eval = nemo_asr.AudioToSpeechLabelDataLayer(
                manifest_filepath=eval_datasets,
                sample_rate=sample_rate,
                labels=labels,
                batch_size=args.eval_batch_size,
                num_workers=cpu_per_traindl,
                **eval_dl_params,
            )

            data_layers_eval.append(data_layer_eval)
    else:
        logging.warning("There were no val datasets passed")

    jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"], )

    jasper_decoder = nemo_asr.JasperDecoderForClassification(
        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
        num_classes=len(labels),
        **jasper_params['JasperDecoderForClassification'],
    )

    ce_loss = nemo_asr.CrossEntropyLossNM()

    logging.info('================================')
    logging.info(
        f"Number of parameters in encoder: {jasper_encoder.num_weights}")
    logging.info(
        f"Number of parameters in decoder: {jasper_decoder.num_weights}")
    logging.info(f"Total number of parameters in model: "
                 f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
    logging.info('================================')

    # Train DAG
    # --- Assemble Training DAG --- #
    audio_signal, audio_signal_len, commands, command_len = data_layer()

    processed_signal, processed_signal_len = data_preprocessor(
        input_signal=audio_signal, length=audio_signal_len)

    processed_signal, processed_signal_len = crop_pad_augmentation(
        input_signal=processed_signal, length=audio_signal_len)

    if spectr_augment_config:
        processed_signal = data_spectr_augmentation(
            input_spec=processed_signal)

    encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                          length=processed_signal_len)

    decoded = jasper_decoder(encoder_output=encoded)

    loss = ce_loss(logits=decoded, labels=commands)

    # Callbacks needed to print info to console and Tensorboard
    train_callback = nemo.core.SimpleLossLoggerCallback(
        # Notice that we pass in loss, predictions, and the labels (commands).
        # Of course we would like to see our training loss, but we need the
        # other arguments to calculate the accuracy.
        tensors=[loss, decoded, commands],
        # The print_func defines what gets printed.
        print_func=partial(monitor_classification_training_progress,
                           eval_metric=None),
        get_tb_values=lambda x: [("loss", x[0])],
        tb_writer=neural_factory.tb_writer,
    )

    chpt_callback = nemo.core.CheckpointCallback(
        folder=neural_factory.checkpoint_dir,
        load_from_folder=args.load_dir,
        step_freq=args.checkpoint_save_freq,
    )

    callbacks = [train_callback, chpt_callback]

    # assemble eval DAGs
    for i, eval_dl in enumerate(data_layers_eval):
        # --- Assemble Training DAG --- #
        test_audio_signal, test_audio_signal_len, test_commands, test_command_len = eval_dl(
        )

        test_processed_signal, test_processed_signal_len = data_preprocessor(
            input_signal=test_audio_signal, length=test_audio_signal_len)

        test_processed_signal, test_processed_signal_len = crop_pad_augmentation(
            input_signal=test_processed_signal,
            length=test_processed_signal_len)

        test_encoded, test_encoded_len = jasper_encoder(
            audio_signal=test_processed_signal,
            length=test_processed_signal_len)

        test_decoded = jasper_decoder(encoder_output=test_encoded)

        test_loss = ce_loss(logits=test_decoded, labels=test_commands)

        # create corresponding eval callback
        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[test_loss, test_decoded, test_commands],
            user_iter_callback=partial(process_classification_evaluation_batch,
                                       top_k=1),
            user_epochs_done_callback=partial(
                process_classification_evaluation_epoch,
                eval_metric=1,
                tag=tagname),
            eval_step=args.
            eval_freq,  # How often we evaluate the model on the test set
            tb_writer=neural_factory.tb_writer,
        )

        callbacks.append(eval_callback)
    return loss, callbacks, steps_per_epoch
Ejemplo n.º 22
0
#
# You should have received a copy of the GNU General Public License
# along with Exhibition.  If not, see <https://www.gnu.org/licenses/>.
#
##

from collections import OrderedDict
from importlib import import_module
import hashlib
import pathlib

from ruamel.yaml import YAML

from .config import Config

yaml_parser = YAML(typ="safe")

DATA_EXTRACTORS = {
    ".yaml": yaml_parser.load,
    ".json": yaml_parser.load,
}

DEFAULT_STRIP_EXTS = [".html"]
DEFAULT_INDEX_FILE = "index.html"


class Node:
    """
    A node represents a file or directory
    """
    _meta_names = ["meta.yaml", "meta.yml"]
Ejemplo n.º 23
0
# coding=utf-8

import os, json, io
from flask import Flask, jsonify, request
from flask_cors import CORS

from ruamel.yaml import YAML
yaml_parser = YAML()  #typ="safe"
app = Flask(__name__)
CORS(app)


def get_YAML_string(obj):
    strngio = io.StringIO()
    yaml_parser.dump(obj, strngio)
    strngio.seek(0)
    yamlstr = strngio.read()
    strngio.close()
    return yamlstr


@app.route('/questions/<int:number>')
def get_question(number: int):
    # Number is base 1
    jsonpath = os.path.join(
        os.path.split(os.path.split(__file__)[0])[0], 'static',
        'Questions.json')
    with open(jsonpath, "rt") as opf:
        jsonstring = opf.read()
        qdct = json.loads(jsonstring)
        questionobj = qdct[number - 1]
Ejemplo n.º 24
0
def read_yaml_file(file_path: str) -> dict:
    with open(file_path, 'r', encoding='utf-8') as yaml_file:
        yaml = YAML(typ='safe')
        return yaml.load(yaml_file)
Ejemplo n.º 25
0
from datetime import datetime
import os
import pathlib
from subprocess import check_call

from ruamel.yaml import YAML

MINICONDA_VERSION = '4.3.27'

HERE = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))

ENV_FILE = 'environment.yml'
FROZEN_FILE = 'environment.frozen.yml'

yaml = YAML(typ='rt')


def fixup(frozen_file):
    """Fixup a frozen environment file

    Conda export has a bug!
    https://github.com/conda/conda/pull/6391
    """
    with open(frozen_file) as f:
        env = yaml.load(f)

    # scrub spurious pip dependencies
    # due to conda #6391

    # note: this scrubs *all* pip dependencies,
Ejemplo n.º 26
0
class TestASRPytorch(NeMoUnitTest):
    labels = [
        " ",
        "a",
        "b",
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
        "'",
    ]
    manifest_filepath = os.path.abspath(
        os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json"))
    featurizer_config = {
        'window': 'hann',
        'dither': 1e-05,
        'normalize': 'per_feature',
        'frame_splicing': 1,
        'int_values': False,
        'window_stride': 0.01,
        'sample_rate': freq,
        'features': 64,
        'n_fft': 512,
        'window_size': 0.02,
    }
    yaml = YAML(typ="safe")

    @classmethod
    def setUpClass(cls) -> None:
        super().setUpClass()
        data_folder = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "../data/"))
        logging.info("Looking up for test ASR data")
        if not os.path.exists(os.path.join(data_folder, "asr")):
            logging.info("Extracting ASR data to: {0}".format(
                os.path.join(data_folder, "asr")))
            tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz")
            tar.extractall(path=data_folder)
            tar.close()
        else:
            logging.info("ASR data found in: {0}".format(
                os.path.join(data_folder, "asr")))

    @classmethod
    def tearDownClass(cls) -> None:
        super().tearDownClass()
        data_folder = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "../data/"))
        logging.info("Looking up for test ASR data")
        if os.path.exists(os.path.join(data_folder, "asr")):
            shutil.rmtree(os.path.join(data_folder, "asr"))

    def test_transcript_normalizers(self):
        # Create test json
        test_strings = [
            "TEST CAPITALIZATION",
            '!\\"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~',
            "3+3=10",
            "3 + 3 = 10",
            "why     is \\t whitepsace\\tsuch a problem   why indeed",
            "\\\"Can you handle quotes?,\\\" says the boy",
            "I Jump!!!!With joy?Now.",
            "Maybe I want to learn periods.",
            "$10 10.90 1-800-000-0000",
            "18000000000 one thousand 2020",
            "1 10 100 1000 10000 100000 1000000",
            "Î  ĻƠvɆȩȅĘ ÀÁÃ Ą ÇĊňńŤŧș",
            "‘’“”❛❜❝❞「 」 〈 〉 《 》 【 】 〔 〕 ⦗ ⦘ 😙  👀 🔨",
            "It only costs $1 000 000! Cheap right?",
            "2500, 3000 are separate but 200, 125 is not",
            "1",
            "1 2",
            "1 2 3",
            "10:00pm is 10:00 pm is 22:00 but not 10: 00 pm",
            "10:00 10:01pm 10:10am 10:90pm",
            "Mr. Expand me!",
            "Mr Don't Expand me!",
        ]
        normalized_strings = [
            "test capitalization",
            'percent and \' plus',
            "three plus three ten",
            "three plus three ten",
            "why is whitepsace such a problem why indeed",
            "can you handle quotes says the boy",
            "i jump with joy now",
            "maybe i want to learn periods",
            "ten dollars ten point nine zero one eight hundred zero zero",
            "eighteen billion one thousand two thousand and twenty",
            # Two line string below
            "one ten thousand one hundred one thousand ten thousand one "
            "hundred thousand one million",
            "i loveeee aaa a ccnntts",
            "''",
            "it only costs one million dollars cheap right",
            # Two line string below
            "two thousand five hundred three thousand are separate but two "
            "hundred thousand one hundred and twenty five is not",
            "one",
            "one two",
            "one two three",
            "ten pm is ten pm is twenty two but not ten zero pm",
            "ten ten one pm ten ten am ten ninety pm",
            "mister expand me",
            "mr don't expand me",
        ]
        manifest_paths = os.path.abspath(
            os.path.join(os.path.dirname(__file__),
                         "../data/asr/manifest_test.json"))

        def remove_test_json():
            os.remove(manifest_paths)

        self.addCleanup(remove_test_json)

        with open(manifest_paths, "w") as f:
            for s in test_strings:
                f.write('{"audio_filepath": "", "duration": 1.0, "text": '
                        f'"{s}"}}\n')
        parser = parsers.make_parser(self.labels, 'en')
        manifest = collections.ASRAudioText(
            manifests_files=[manifest_paths],
            parser=parser,
        )

        for i, s in enumerate(normalized_strings):
            self.assertTrue(manifest[i].text_tokens == parser(s))

    def test_pytorch_audio_dataset(self):
        featurizer = WaveformFeaturizer.from_config(self.featurizer_config)
        ds = AudioDataset(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            featurizer=featurizer,
        )

        for i in range(len(ds)):
            if i == 5:
                logging.info(ds[i])
            # logging.info(ds[i][0].shape)
            # self.assertEqual(freq, ds[i][0].shape[0])

    def test_dataloader(self):
        batch_size = 4
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
        )
        for ind, data in enumerate(dl.data_iterator):
            # With num_workers update, this is no longer true
            # Moving to GPU is handled by AudioPreprocessor
            # data is on GPU
            # self.assertTrue(data[0].is_cuda)
            # self.assertTrue(data[1].is_cuda)
            # self.assertTrue(data[2].is_cuda)
            # self.assertTrue(data[3].is_cuda)
            # first dimension is batch
            self.assertTrue(data[0].size(0) == batch_size)
            self.assertTrue(data[1].size(0) == batch_size)
            self.assertTrue(data[2].size(0) == batch_size)
            self.assertTrue(data[3].size(0) == batch_size)

    def test_preprocessor_errors(self):
        def create_broken_preprocessor_1():
            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=2,
                                                       n_window_size=2)

        def create_broken_preprocessor_2():
            nemo_asr.AudioToMelSpectrogramPreprocessor(window_stride=2,
                                                       n_window_stride=2)

        def create_broken_preprocessor_3():
            nemo_asr.AudioToMelSpectrogramPreprocessor(n_window_stride=2)

        def create_good_preprocessor_1():
            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=0.02,
                                                       window_stride=0.01)

        def create_good_preprocessor_2():
            nemo_asr.AudioToMelSpectrogramPreprocessor(
                window_size=None,
                window_stride=None,
                n_window_size=256,
                n_window_stride=32,
            )

        self.assertRaises(ValueError, create_broken_preprocessor_1)
        self.assertRaises(ValueError, create_broken_preprocessor_2)
        self.assertRaises(ValueError, create_broken_preprocessor_3)
        create_good_preprocessor_1()
        create_good_preprocessor_2()

    def test_kaldi_dataloader(self):
        batch_size = 4
        dl = nemo_asr.KaldiFeatureDataLayer(
            kaldi_dir=os.path.abspath(
                os.path.join(os.path.dirname(__file__),
                             '../data/asr/kaldi_an4/')),
            labels=self.labels,
            batch_size=batch_size,
        )
        for data in dl.data_iterator:
            self.assertTrue(data[0].size(0) == batch_size)

        dl_test_min = nemo_asr.KaldiFeatureDataLayer(
            kaldi_dir=os.path.abspath(
                os.path.join(os.path.dirname(__file__),
                             '../data/asr/kaldi_an4/')),
            labels=self.labels,
            batch_size=batch_size,
            min_duration=1.0,
        )
        self.assertTrue(len(dl_test_min) == 18)

        dl_test_max = nemo_asr.KaldiFeatureDataLayer(
            kaldi_dir=os.path.abspath(
                os.path.join(os.path.dirname(__file__),
                             '../data/asr/kaldi_an4/')),
            labels=self.labels,
            batch_size=batch_size,
            max_duration=5.0,
        )
        self.assertTrue(len(dl_test_max) == 19)

    def test_trim_silence(self):
        batch_size = 4
        normal_dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
            shuffle=False,
        )
        trimmed_dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            trim_silence=True,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
            shuffle=False,
        )
        for norm, trim in zip(normal_dl.data_iterator,
                              trimmed_dl.data_iterator):
            for point in range(batch_size):
                self.assertTrue(norm[1][point].data >= trim[1][point].data)

    def test_audio_preprocessors(self):
        batch_size = 5
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=batch_size,
            # placement=DeviceType.GPU,
            drop_last=True,
            shuffle=False,
        )

        installed_torchaudio = True
        try:
            import torchaudio
        except ModuleNotFoundError:
            installed_torchaudio = False
            with self.assertRaises(ModuleNotFoundError):
                to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(
                    n_fft=400, window=None)
            with self.assertRaises(ModuleNotFoundError):
                to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)

        if installed_torchaudio:
            to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(
                n_fft=400, window=None)
            to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)

        to_melspec = nemo_asr.AudioToMelSpectrogramPreprocessor(features=50)

        for batch in dl.data_iterator:
            input_signals, seq_lengths, _, _ = batch
            input_signals = input_signals.to(to_melspec._device)
            seq_lengths = seq_lengths.to(to_melspec._device)

            melspec = to_melspec.forward(input_signals, seq_lengths)

            if installed_torchaudio:
                spec = to_spectrogram.forward(input_signals, seq_lengths)
                mfcc = to_mfcc.forward(input_signals, seq_lengths)

            # Check that number of features is what we expect
            self.assertTrue(melspec[0].shape[1] == 50)

            if installed_torchaudio:
                self.assertTrue(spec[0].shape[1] == 201)  # n_fft // 2 + 1 bins
                self.assertTrue(mfcc[0].shape[1] == 15)

    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
    def test_jasper_training(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))
        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        # logging.info(jasper_encoder)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'
                                              ),
        )
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
    def test_double_jasper_training(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            # featurizer_config=self.featurizer_config,
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder1 = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_encoder2 = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max")
        mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max")
        jasper_decoder1 = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                       num_classes=len(
                                                           self.labels))
        jasper_decoder2 = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                       num_classes=len(
                                                           self.labels))

        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded1, encoded_len1 = jasper_encoder1(audio_signal=processed_signal,
                                                 length=p_length)
        encoded2, encoded_len2 = jasper_encoder2(audio_signal=processed_signal,
                                                 length=p_length)
        log_probs1 = jasper_decoder1(encoder_output=encoded1)
        log_probs2 = jasper_decoder2(encoder_output=encoded2)
        log_probs = mx_max1(x1=log_probs1, x2=log_probs2)
        encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(str(x[0].item())))
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
    def test_quartznet_training(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/quartznet_test.yaml"))) as f:
            quartz_model_definition = self.yaml.load(f)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=quartz_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **quartz_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))
        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'
                                              ),
        )
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    def test_stft_conv(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
            'stft_conv': True,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))

        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))

        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        # logging.info(jasper_encoder)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )

        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(str(x[0].item())))
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    def test_clas(self):
        with open('examples/asr/experimental/configs/garnet_an4.yaml') as file:
            cfg = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
            'stft_conv': True,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        encoder = nemo_asr.JasperEncoder(
            jasper=cfg['encoder']['jasper'],
            activation=cfg['encoder']['activation'],
            feat_in=cfg['input']['train']['features'],
        )
        connector = nemo_asr.JasperRNNConnector(
            in_channels=cfg['encoder']['jasper'][-1]['filters'],
            out_channels=cfg['decoder']['hidden_size'],
        )
        decoder = nemo.backends.pytorch.common.DecoderRNN(
            voc_size=len(self.labels),
            bos_id=0,
            hidden_size=cfg['decoder']['hidden_size'],
            attention_method=cfg['decoder']['attention_method'],
            attention_type=cfg['decoder']['attention_type'],
            in_dropout=cfg['decoder']['in_dropout'],
            gru_dropout=cfg['decoder']['gru_dropout'],
            attn_dropout=cfg['decoder']['attn_dropout'],
            teacher_forcing=cfg['decoder']['teacher_forcing'],
            curriculum_learning=cfg['decoder']['curriculum_learning'],
            rnn_type=cfg['decoder']['rnn_type'],
            n_layers=cfg['decoder']['n_layers'],
            tie_emb_out_weights=cfg['decoder']['tie_emb_out_weights'],
        )
        loss = nemo.backends.pytorch.common.SequenceLoss()

        # DAG
        audio_signal, a_sig_length, transcripts, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)
        encoded, encoded_len = encoder(audio_signal=processed_signal,
                                       length=p_length)
        encoded = connector(tensor=encoded)
        log_probs, _ = decoder(targets=transcripts, encoder_outputs=encoded)
        loss = loss(log_probs=log_probs, targets=transcripts)

        # Train
        callback = nemo.core.SimpleLossLoggerCallback(
            tensors=[loss],
            print_func=lambda x: logging.info(str(x[0].item())))
        # Instantiate an optimizer to perform `train` action
        optimizer = self.nf.get_trainer()
        optimizer.train(
            [loss],
            callbacks=[callback],
            optimizer="sgd",
            optimization_params={
                "num_epochs": 10,
                "lr": 0.0003
            },
        )

    def test_jasper_eval(self):
        with open(
                os.path.abspath(
                    os.path.join(os.path.dirname(__file__),
                                 "../data/jasper_smaller.yaml"))) as file:
            jasper_model_definition = self.yaml.load(file)
        dl = nemo_asr.AudioToTextDataLayer(
            manifest_filepath=self.manifest_filepath,
            labels=self.labels,
            batch_size=4,
        )
        pre_process_params = {
            'frame_splicing': 1,
            'features': 64,
            'window_size': 0.02,
            'n_fft': 512,
            'dither': 1e-05,
            'window': 'hann',
            'sample_rate': 16000,
            'normalize': 'per_feature',
            'window_stride': 0.01,
        }
        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
            **pre_process_params)
        jasper_encoder = nemo_asr.JasperEncoder(
            feat_in=jasper_model_definition[
                'AudioToMelSpectrogramPreprocessor']['features'],
            **jasper_model_definition['JasperEncoder'],
        )
        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024,
                                                      num_classes=len(
                                                          self.labels))
        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
        greedy_decoder = nemo_asr.GreedyCTCDecoder()
        # DAG
        audio_signal, a_sig_length, transcript, transcript_len = dl()
        processed_signal, p_length = preprocessing(input_signal=audio_signal,
                                                   length=a_sig_length)

        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,
                                              length=p_length)
        # logging.info(jasper_encoder)
        log_probs = jasper_decoder(encoder_output=encoded)
        loss = ctc_loss(
            log_probs=log_probs,
            targets=transcript,
            input_length=encoded_len,
            target_length=transcript_len,
        )
        predictions = greedy_decoder(log_probs=log_probs)

        from nemo.collections.asr.helpers import (
            process_evaluation_batch,
            process_evaluation_epoch,
        )

        eval_callback = nemo.core.EvaluatorCallback(
            eval_tensors=[loss, predictions, transcript, transcript_len],
            user_iter_callback=lambda x, y: process_evaluation_batch(
                x, y, labels=self.labels),
            user_epochs_done_callback=process_evaluation_epoch,
        )
        # Instantiate an optimizer to perform `train` action
        self.nf.eval(callbacks=[eval_callback])
from ipam.models import Role
from ruamel.yaml import YAML
from pathlib import Path
import sys

file = Path('/opt/netbox/initializers/prefix_vlan_roles.yml')
if not file.is_file():
  sys.exit()

with file.open('r') as stream:
  yaml = YAML(typ='safe')
  roles = yaml.load(stream)

  if roles is not None:
    for params in roles:
      role, created = Role.objects.get_or_create(**params)

      if created:
        print("⛹️‍ Created Prefix/VLAN Role", role.name)
Ejemplo n.º 28
0
def from_yaml_string(s):
    return YAML().load(StringIO(s))
    def _make_split_kitti(self):
        """
        Use generated <self.voxel_version> output to build split.
        """

        assert self._input_format == "kitti"

        self.sample_id_template = "semantic_kitti_{seq:02d}_{frame:04d}"

        self._seq_format = lambda x: "{:02d}".format(x)
        self._frame_format = lambda x: "{:06d}".format(x)
        self._label_format = lambda x: "{:06d}".format(x)
        self._voxel_format = lambda x: "{:06d}".format(x)

        # Todo: no test split option for now
        assert self.testset_flag is False
        valid_splits = ["train", "valid"]
        map_split_names = {"train": "train", "valid": "val", "test": "test"}
        # read config
        with open(str(self.config_semantic), "r") as file_conf_sem:
            yaml = YAML()
            data = yaml.load(file_conf_sem)
            self._config_data = {k: dict(v) for k, v in data.items()}

        data_splits = {
            map_split_names[k]: v
            for k, v in self._config_data["split"].items()
            if k in valid_splits
        }
        self._split = {
            "name": "semantic_kitti_voxels_{}".format(
                "default" if not self.testset_flag else "test"
            ),
            "data": {k: [] for k in data_splits.keys()},
        }

        self._samples_to_generate = []

        def parse_sequence_folder_name(x):
            try:
                return int(x)
            except ValueError:
                return -1

        voxel_sequences = {
            parse_sequence_folder_name(x.name): x
            for x in self.semantic_kitti_voxels_root.iterdir()
        }

        for split_name, sequences in data_splits.items():
            split_data = self._split["data"][split_name]
            for sequence_index in sequences:
                if not self.testset_flag:

                    if sequence_index not in voxel_sequences:
                        logger.warning(
                            "Sequence {:02d} not available. Skipping.".format(
                                sequence_index
                            )
                        )
                        continue

                    voxel_dir = voxel_sequences[sequence_index] / self.voxel_version
                    if not voxel_dir.is_dir():
                        logger.warning(
                            "Voxels not available in sequence {:02d}. Skipping.".format(
                                sequence_index
                            )
                        )
                        continue

                    self._voxel_data_cache[sequence_index] = {
                        int(x.stem[:6]): x
                        for x in (
                            voxel_sequences[sequence_index] / self.voxel_version
                        ).iterdir()
                        if x.suffix == ".tfrecord"
                    }

                    split_data.extend(
                        [
                            self.sample_id_template.format(seq=sequence_index, frame=x)
                            for x in sorted(
                                list(self._voxel_data_cache[sequence_index].keys())
                            )
                        ]
                    )
                    self._samples_to_generate.extend(
                        [
                            (sequence_index, x)
                            for x in sorted(
                                list(self._voxel_data_cache[sequence_index].keys())
                            )
                        ]
                    )
                else:
                    raise NotImplementedError()

        self._label_mapping: dict = self._config_data["learning_map"]
        # make 255 the 'unlabeled' label and shift all others down (-1) accordingly
        self._label_mapping = {
            k: v - 1 if v != 0 else 255 for k, v in self._label_mapping.items()
        }
        self._label_mapping_voxels = self._label_mapping.copy()
        # map unlabeled to extra entry 254
        self._label_mapping_voxels[0] = 254
        assert all(x <= 255 for x in self._label_mapping.values())
        assert all(x <= 255 for x in self._label_mapping_voxels.values())

        self._label_mapping = np.vectorize(self._label_mapping.get, otypes=[np.int64])
        self._label_mapping_voxels = np.vectorize(
            self._label_mapping_voxels.get, otypes=[np.int64]
        )
import os
from collections.abc import Mapping
from functools import lru_cache
from urllib.parse import urlparse

from ruamel.yaml import YAML

yaml = YAML(typ="safe")


def _merge_dictionaries(a, b):
    """Merge two dictionaries recursively.

    Simplified From https://stackoverflow.com/a/7205107
    """
    merged = a.copy()
    for key in b:
        if key in a:
            if isinstance(a[key], Mapping) and isinstance(b[key], Mapping):
                merged[key] = _merge_dictionaries(a[key], b[key])
            else:
                merged[key] = b[key]
        else:
            merged[key] = b[key]
    return merged


# memoize so we only load config once
@lru_cache()
def _load_values():
    """Load configuration from disk