def __init__(self): Service.__init__(self) self.speech_in_dir = os.path.dirname(os.path.abspath(__file__)) + '/' self.trained_model_path = os.path.join( 'resources', 'models', 'backchannel') + '/pytorch_acoustic_backchanneller.pt' self.load_model()
def __init__(self, domains: List[Domain], greet_on_first_turn: bool = False): Service.__init__(self, domain="") self.domains = domains self.current_domain = None self.greet_on_first_turn = greet_on_first_turn
def __init__(self, domain: Union[str, Domain] = "", conversation_log_dir: str = None, enable_plotting: bool = False, threshold: int = 8000, voice_privacy: bool = False, identifier: str = None) -> None: """ A service that can record a microphone upon a key pressing event and publish the result as an array. The end of the utterance is detected automatically, also the voice can be masked to alleviate privacy issues. Args: domain (Domain): I don't know why this is here. Service needs it, but it means nothing in this context. conversation_log_dir (string): If this parameter is given, log files of the conversation will be created in this directory enable_plotting (boolean): If this is set to True, the recorder is no longer real time able and thus the recordings don't work properly. This is just to be used to tune the threshold for the end of utterance detection, not during deployment. threshold (int): The threshold below which the assumption of the end of utterance detection is silence voice_privacy (boolean): Whether or not to enable the masking of the users voice identifier (string): I don't know why this is here. Service needs it. """ Service.__init__(self, domain=domain, identifier=identifier) self.conversation_log_dir = conversation_log_dir self.recording_indicator = False self.audio_interface = pyaudio.PyAudio() self.push_to_talk_listener = keyboard.Listener( on_press=self.start_recording) self.threshold = threshold self.enable_plotting = enable_plotting self.voice_privacy = voice_privacy
def __init__(self, domain="", camera_id: int = 0, openface_port: int = 6004, delay: int = 2, identifier=None): """ Args: camera_id: index of the camera you want to use (if you only have one camera: 0) """ Service.__init__(self, domain="", identifier=identifier) self.camera_id = camera_id self.openface_port = openface_port self.openface_running = False self.threshold = delay # provide number of seconds as parameter, one second = 15 frames ctx = Context.instance() self.openface_endpoint = ctx.socket(zmq.PAIR) self.openface_endpoint.bind(f"tcp://127.0.0.1:{self.openface_port}") startExtraction = f"{os.path.join(get_root_dir(), 'tools/OpenFace/build/bin/FaceLandmarkVidZMQ')} -device {self.camera_id} -port 6004" # todo config open face port self.p_openface = subprocess.Popen( startExtraction.split(), stdout=subprocess.PIPE) # start OpenFace self.extracting = False self.extractor_thread = None
def __init__(self, domain: Domain = "", identifier=None, conversation_log_dir: str = None, use_cuda=False): """ Transforms spoken input from the user to text for further processing. Args: domain (Domain): Needed for Service, but has no meaning here identifier (string): Needed for Service conversation_log_dir (string): If this is provided, logfiles will be placed by this Service into the specified directory. use_cuda (boolean): Whether or not to run the computations on a GPU """ Service.__init__(self, domain=domain, identifier=identifier) self.conversation_log_dir = conversation_log_dir # load model model_dir = os.path.join(get_root_dir(), "resources", "models", "speech", "multi_en_20190916") self.model, conf = load_trained_model( os.path.join(model_dir, "model.bin")) self.vocab = conf.char_list # setup beam search self.bs = BeamSearch(scorers=self.model.scorers(), weights={ "decoder": 1.0, "ctc": 0.0 }, sos=self.model.sos, eos=self.model.eos, beam_size=4, vocab_size=len(self.vocab), pre_beam_score_key="decoder") self.bs.__class__ = BatchBeamSearch # choose hardware to run on if use_cuda: self.device = "cuda" else: self.device = "cpu" self.model.to(self.device) self.bs.to(self.device) # change from training mode to eval mode self.model.eval() self.bs.eval() # scale and offset for feature normalization # follows https://github.com/kaldi-asr/kaldi/blob/33255ed224500f55c8387f1e4fa40e08b73ff48a/src/transform/cmvn.cc#L92-L111 norm = torch.load(os.path.join(model_dir, "cmvn.bin")) count = norm[0][-1] mean = norm[0][:-1] / count var = (norm[1][:-1] / count) - mean * mean self.scale = 1.0 / torch.sqrt(var) self.offset = -(mean * self.scale)
def __init__(self, domain: Domain = ""): """ Given a sound, this service extracts features and passes them on to the decoder for ASR Args: domain (Domain): Needed for Service, no meaning here """ Service.__init__(self, domain=domain)
def __init__(self): """ Emotion recognition module. On initialization all necessary models are loaded. """ Service.__init__(self) self.emotion_dir = os.path.dirname(os.path.abspath(__file__)) self.model_path = os.path.abspath( os.path.join( self.emotion_dir, "..", "..", "resources", "models", "emotion" ) ) def load_args(emo_representation): arg_dict = pickle.load( open(os.path.join( self.model_path, f'{emo_representation}_args.pkl'), 'rb') ) return arg_dict def load_model(emo_representation, arg_dict): ARGS = arg_dict['args'] model = cnn( kernel_size=(ARGS.height, arg_dict['D_in']), D_out=arg_dict['D_out'], args=ARGS ) model.load_state_dict( torch.load( os.path.join(self.model_path, f'{emo_representation}_model_params.pt'), map_location=torch.device('cpu') ) ) model.eval() return model self.emo_representations = ['category', 'arousal', 'valence'] self.models = {} self.args = {} for emo_representation in self.emo_representations: self.args[emo_representation] = load_args(emo_representation) self.models[emo_representation] = load_model( emo_representation, self.args[emo_representation] ) self.arousal_mapping = {0: 'low', 1: 'medium', 2: 'high'} self.valence_mapping = {0: 'negative', 1: 'neutral', 2: 'positive'} self.category_mapping = { 0: EmotionType.Angry, 1: EmotionType.Happy, 2: EmotionType.Neutral, 3: EmotionType.Sad }
def __init__(self, domain: JSONLookupDomain = None, logger: DiasysLogger = None, random: bool = True, static_emotion: EmotionType = EmotionType.Neutral, static_engagement: EngagementType = EngagementType.High): Service.__init__(self, domain=domain) self.domain = domain self.logger = logger self.random = random self.engagement = static_engagement self.emotion = static_emotion
def __init__(self, domain: Domain = ""): Service.__init__(self, domain=domain) self.module_dir = os.path.dirname(os.path.abspath(__file__)) # # CLAHE (Contrast Limited Adaptive Histogram Equalization) self.CLAHE = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) # for detecting faces (returns coordinates of rectangle(s) of face area(s)) self.DETECTOR = dlib.get_frontal_face_detector() # facial landmark predictor predictor_file = os.path.abspath( os.path.join(self.module_dir, '..', '..', '..', 'resources', 'models', 'video', 'shape_predictor_68_face_landmarks.dat')) self.PREDICTOR = dlib.shape_predictor(predictor_file)
def __init__(self, domain: JSONLookupDomain = None, logger: DiasysLogger = DiasysLogger()): """ Initializes the policy Arguments: domain (JSONLookupDomain): the domain that the affective policy should operate in """ self.first_turn = True Service.__init__(self, domain=domain) self.logger = logger
def __init__(self, domain: Domain, sub_topic_domains={}, template_file: str = None, logger: DiasysLogger = DiasysLogger(), template_file_german: str = None, emotions: List[str] = [], debug_logger = None): """Constructor mainly extracts methods and rules from the template file""" Service.__init__(self, domain=domain, sub_topic_domains=sub_topic_domains, debug_logger=debug_logger) self.domain = domain self.template_filename = template_file self.templates = {} self.logger = logger self.emotions = emotions self._initialise_templates()
def __init__(self, domain: Domain = "", conversation_log_dir: str = None, identifier: str = None): """ Service that plays the system utterance as sound Args: domain (Domain): Needed for Service, but has no meaning here conversation_log_dir (string): If this is provided it will create log files in the specified directory. identifier (string): Needed for Service. """ Service.__init__(self, domain=domain, identifier=identifier) self.conversation_log_dir = conversation_log_dir self.interaction_count = 0
def __init__(self, domain=None, camera_id: int = 0, capture_interval: int = 10e5, identifier: str = None): """ Args: camera_id (int): device id (if only 1 camera device is connected, id is 0, if two are connected choose between 0 and 1, ...) capture_interval (int): try to capture a frame every x microseconds - is a lower bound, no hard time guarantees (e.g. 5e5 -> every >= 0.5 seconds) """ Service.__init__(self, domain, identifier=identifier) self.cap = cv2.VideoCapture(camera_id) # get handle to camera device if not self.cap.isOpened(): self.cap.open() # open self.terminating = Event() self.terminating.clear() self.capture_thread = Thread(target=self.capture) # create thread object for capturing self.capture_interval = capture_interval
def __init__(self, domain: JSONLookupDomain, logger: DiasysLogger = DiasysLogger(), max_turns: int = 25): """ Initializes the policy Arguments: domain {domain.jsonlookupdomain.JSONLookupDomain} -- Domain """ self.first_turn = True Service.__init__(self, domain=domain) self.current_suggestions = [] # list of current suggestions self.s_index = 0 # the index in current suggestions for the current system reccomendation self.domain_key = domain.get_primary_key() self.logger = logger self.max_turns = max_turns
def __init__(self, domain: LookupDomain, \ logger: DiasysLogger = DiasysLogger(), device: str = 'cpu'): """Creates neural networks for semantic parsing and other required utils Args: domain: the QA domain logger: the logger device: PyTorch device name """ Service.__init__(self, domain=domain, debug_logger=logger) self.device = torch.device(device) self.nn_relation = self._load_relation_model() self.nn_entity = self._load_entity_model() self.nn_direction = self._load_direction_model() self.tags = self._load_tag_set() self.max_seq_len = 40 self.embedding_creator = BertEmbedding(max_seq_length=self.max_seq_len)
def __init__(self, domain: JSONLookupDomain, logger: DiasysLogger = DiasysLogger(), language: Language = None): """ Loads - domain key - informable slots - requestable slots - domain-independent regular expressions - domain-specific regualer espressions It sets the previous system act to None Args: domain {domain.jsonlookupdomain.JSONLookupDomain} -- Domain """ Service.__init__(self, domain=domain) self.logger = logger self.language = language if language else Language.ENGLISH # Getting domain information self.domain_name = domain.get_domain_name() self.domain_key = domain.get_primary_key() # Getting lists of informable and requestable slots self.USER_INFORMABLE = domain.get_informable_slots() self.USER_REQUESTABLE = domain.get_requestable_slots() # Getting the relative path where regexes are stored self.base_folder = os.path.join(get_root_dir(), 'resources', 'nlu_regexes') # Setting previous system act to None to signal the first turn # self.prev_sys_act = None self.sys_act_info = { 'last_act': None, 'lastInformedPrimKeyVal': None, 'lastRequestSlot': None} self.language = Language.ENGLISH self._initialize()
def __init__(self, domain: Domain, template_file: str = None, sub_topic_domains: Dict[str, str] = {}, logger: DiasysLogger = DiasysLogger(), template_file_german: str = None, language: Language = None): """Constructor mainly extracts methods and rules from the template file""" Service.__init__(self, domain=domain, sub_topic_domains=sub_topic_domains) self.language = language if language else Language.ENGLISH self.template_english = template_file # TODO: at some point if we expand languages, maybe make kwargs? --LV self.template_german = template_file_german self.domain = domain self.template_filename = None self.templates = None self.logger = logger self.language = Language.ENGLISH self._initialise_language(self.language)
def __init__(self, domain=None, logger=None): Service.__init__(self, domain=domain) self.logger = logger self.us = UserState()
def __init__(self, domain: Domain = "", identifier: str = None, use_cuda=False, sub_topic_domains: Dict[str, str] = {}): """ Text To Speech Module that reads out the system utterance. Args: domain (Domain): Needed for Service, no meaning here identifier (string): Needed for Service use_cuda (boolean): Whether or not to perform computations on GPU. Highly recommended if available sub_topic_domains: see `services.service.Service` constructor for more details """ Service.__init__(self, domain=domain, identifier=identifier, sub_topic_domains=sub_topic_domains) self.models_directory = os.path.join(get_root_dir(), "resources", "models", "speech") # The following lines can be changed to incorporate different models. # This is the only thing that needs to be changed for that, everything else should be dynamic. self.transcription_type = "phn" self.dict_path = os.path.join(self.models_directory, "phn_train_no_dev_pytorch_train_fastspeech.v4", "data", "lang_1phn", "train_no_dev_units.txt") self.model_path = os.path.join(self.models_directory, "phn_train_no_dev_pytorch_train_fastspeech.v4", "exp", "phn_train_no_dev_pytorch_train_fastspeech.v4", "results", "model.last1.avg.best") self.vocoder_path = os.path.join(self.models_directory, "ljspeech.parallel_wavegan.v1", "checkpoint-400000steps.pkl") self.vocoder_conf = os.path.join(self.models_directory, "ljspeech.parallel_wavegan.v1", "config.yml") # define device to run the synthesis on if use_cuda: self.device = torch.device("cuda") else: self.device = torch.device("cpu") # define end to end TTS model self.input_dimensions, self.output_dimensions, self.train_args = get_model_conf(self.model_path) model_class = dynamic_import.dynamic_import(self.train_args.model_module) model = model_class(self.input_dimensions, self.output_dimensions, self.train_args) torch_load(self.model_path, model) self.model = model.eval().to(self.device) self.inference_args = Namespace(**{"threshold": 0.5, "minlenratio": 0.0, "maxlenratio": 10.0}) # define neural vocoder with open(self.vocoder_conf) as vocoder_config_file: self.config = yaml.load(vocoder_config_file, Loader=yaml.Loader) vocoder = ParallelWaveGANGenerator(**self.config["generator_params"]) vocoder.load_state_dict(torch.load(self.vocoder_path, map_location="cpu")["model"]["generator"]) vocoder.remove_weight_norm() self.vocoder = vocoder.eval().to(self.device) with open(self.dict_path) as dictionary_file: lines = dictionary_file.readlines() lines = [line.replace("\n", "").split(" ") for line in lines] self.char_to_id = {c: int(i) for c, i in lines} self.g2p = G2p() # download the pretrained Punkt tokenizer from NLTK. This is done only # the first time the code is executed on a machine, if it has been done # before, this line will be skipped and output a warning. We will probably # redirect warnings into a file rather than std_err in the future, since # there's also a lot of pytorch warnings going on etc. nltk.download('punkt', quiet=True)
def __init__(self, config=None): Service.__init__(self, config) self._url = config["url"] self._headers = config["headers"] self._current_article = 0
def __init__(self, domain: LookupDomain, logger: DiasysLogger = DiasysLogger()): # only call super class' constructor Service.__init__(self, domain=domain, debug_logger=logger)
def __init__( self, domain: JSONLookupDomain, architecture: NetArchitecture = NetArchitecture.DUELING, hidden_layer_sizes: List[int] = [256, 700, 700], # vanilla architecture shared_layer_sizes: List[int] = [256], value_layer_sizes: List[int] = [300, 300], advantage_layer_sizes: List[int] = [400, 400], # dueling architecture lr: float = 0.0001, discount_gamma: float = 0.99, target_update_rate: int = 3, replay_buffer_size: int = 8192, batch_size: int = 64, buffer_cls: Type[Buffer] = NaivePrioritizedBuffer, eps_start: float = 0.3, eps_end: float = 0.0, l2_regularisation: float = 0.0, gradient_clipping: float = 5.0, p_dropout: float = 0.0, training_frequency: int = 2, train_dialogs: int = 1000, include_confreq: bool = False, logger: DiasysLogger = DiasysLogger(), max_turns: int = 25, summary_writer: SummaryWriter = None, device=torch.device('cpu'), obj_evaluator: ObjectiveReachedEvaluator = None): """ Args: target_update_rate: if 1, vanilla dqn update if > 1, double dqn with specified target update rate """ RLPolicy.__init__(self, domain, buffer_cls=buffer_cls, buffer_size=replay_buffer_size, batch_size=batch_size, discount_gamma=discount_gamma, include_confreq=include_confreq, logger=logger, max_turns=max_turns, device=device, obj_evaluator) Service.__init__(self, domain=domain) self.writer = summary_writer self.training_frequency = training_frequency self.train_dialogs = train_dialogs self.lr = lr self.gradient_clipping = gradient_clipping if gradient_clipping > 0.0 and self.logger: self.logger.info("Gradient Clipping: " + str(gradient_clipping)) self.target_update_rate = target_update_rate self.epsilon_start = eps_start self.epsilon_end = eps_end # Select network architecture if architecture == NetArchitecture.VANILLA: if self.logger: self.logger.info("Architecture: Vanilla") self.model = DQN(self.state_dim, self.action_dim, hidden_layer_sizes=hidden_layer_sizes, dropout_rate=p_dropout) else: if self.logger: self.logger.info("Architecture: Dueling") self.model = DuelingDQN( self.state_dim, self.action_dim, shared_layer_sizes=shared_layer_sizes, value_layer_sizes=value_layer_sizes, advantage_layer_sizes=advantage_layer_sizes, dropout_rate=p_dropout) # Select network update self.target_model = None if target_update_rate > 1: if self.logger: self.logger.info("Update: Double") if architecture == NetArchitecture.VANILLA: self.target_model = copy.deepcopy(self.model) elif self.logger: self.logger.info("Update: Vanilla") self.optim = optim.Adam(self.model.parameters(), lr=lr, weight_decay=l2_regularisation) self.loss_fun = nn.SmoothL1Loss(reduction='none') # self.loss_fun = nn.MSELoss(reduction='none') self.train_call_count = 0 self.total_train_dialogs = 0 self.epsilon = self.epsilon_start self.turns = 0 self.cumulative_train_dialogs = -1
def test_init(aff_nlg, domain): aff_nlg.__init__(domain) Service.__init__(aff_nlg, domain='superhero') assert aff_nlg.domain == 'superhero'
def __init__(self, domain: Domain = None): Service.__init__(self, domain=domain)
def __init__(self, config, service_resolver): Service.__init__(self, config=config, service_resolver=service_resolver)
def __init__(self, domain: Domain = None, conversation_log_dir: str = None, language: Language = None): Service.__init__(self, domain=domain) # self.language = language self.language = Language.ENGLISH self.conversation_log_dir = conversation_log_dir self.interaction_count = 0
def __init__(self, domain=None, logger=None): Service.__init__(self, domain=domain) self.logger = logger self.bs = BeliefState(domain)