def run(self, filepath): self.log(logging.INFO, "Starting %s" % (filepath)) out_file = self.derive_new_file_path(filepath, ".csv") if file_utils.should_run(filepath, out_file): t = transcript.PlaintextTranscript( filepath=filepath, label=None, pos_tagger_path=self.pos_tagger_path) transcript_utterances_fillers = None if self.filler_dir: file_id = os.path.basename(file_utils.strip_ext(filepath)) if file_id in self.filler_files: filler_file = os.path.join(self.filler_dir, self.filler_files[file_id]) filler_transcript = transcript.PlaintextTranscript( filepath=filler_file, label=None, pos_tagger_path=self.pos_tagger_path) transcript_utterances_fillers = filler_transcript.tokens self.feature_extractor.extract( t, out_csv=out_file, transcript_utterances_fillers=transcript_utterances_fillers) self.log(logging.INFO, "Done %s -> %s" % (filepath, out_file)) self.emit(out_file)
def run(self, in_file): self.log(logging.INFO, "Starting %s" % (in_file)) out_file = self.derive_new_file_path(in_file, 'txt') if file_utils.should_run(in_file, out_file): cmd = self.cmd.replace("**in_placeholder**", os.path.abspath(in_file)) process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = process.communicate() res = process.returncode if res == 0: hyp = self.regex.search(str(err).replace("\\n", "\n")).group(1) with open(out_file, "w") as f: f.write(hyp) self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file)) else: error_lines = self.error_regex.findall( str(err).replace("\\n", "\n")) self.log( logging.ERROR, "Failed %s -> %s with error code %i. cmd: %s. Error message: %s" % (in_file, out_file, res, cmd, "\n\t".join(error_lines))) return self.emit([out_file])
def run(self): if file_utils.should_run(self.filepath, self.out_file): self.features['FileID'] = self.filepath with open(self.filepath) as f: self.tokens = f.read() self.compute_basic_word_stats() self.compute_word_frequency_norms() self._run_chinese_corenlp(self.filepath) self._parse_corenlp_output() self.write_features(self.out_file, debug=False)
def run(self, in_file): self.log(logging.INFO, "Starting %s" % (in_file)) out_file = self.derive_new_file_path(in_file, self.out_ext) if file_utils.should_run(in_file, out_file): try: self.matlab_function(in_file, out_file) except Exception as e: self.log(logging.ERROR, e) self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file)) self.emit(out_file)
def run(self, in_file): self.log(logging.INFO, "Starting %s" % (in_file)) out_file = self.derive_new_file_path(in_file, self.ext) if file_utils.should_run(in_file, out_file): cmd = self.command.format(in_file=in_file, out_file=out_file) res = shell_run(cmd.split(" ")) if res != 0: self.log( logging.ERROR, "Failed %s -> %s with error code %i. cmd: %s" % (in_file, out_file, res, cmd)) return self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file)) self.emit(out_file)
def run(self, mp3_file): self.log(logging.INFO, "Starting %s" % (mp3_file)) if not mp3_file.endswith(".mp3"): self.log(logging.ERROR, "Failed %s. Not mp3 file" % (mp3_file)) return wav_file = self.derive_new_file_path(mp3_file, "wav") if file_utils.should_run(mp3_file, wav_file): res = shell_run(["lame", "--decode", mp3_file, wav_file]) if res != 0: self.log( logging.ERROR, "Failed %s -> %s with lame error code %i" % (mp3_file, wav_file, res)) return self.log(logging.INFO, "Done %s -> %s" % (mp3_file, wav_file)) self.emit(wav_file)
def run(self, in_file): self.log(logging.INFO, "Starting %s" % (in_file)) out_file = self.derive_new_file_path(in_file, self.out_ext) if file_utils.should_run(in_file, out_file): cmd = [ self.opensmile_exec, "-C", self.conf_file, "-I", in_file, self.out_flag, out_file ] + self.extra_flags res = shell_run(cmd) if res != 0: self.log( logging.ERROR, "Failed %s -> %s with SmileExtract error code %i. cmd: %s" % (in_file, out_file, res, " ".join(cmd))) return self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file)) self.emit([out_file])
def run(self, in_file): self.log(logging.INFO, "Starting %s" % (in_file)) out_file = self.derive_new_file_path(in_file, 'csv') if file_utils.should_run(in_file, out_file): cmd = [ 'praat', '--run', 'scripts/syllable_nuclei_v2.praat', os.path.abspath(in_file) ] res = shell_run(cmd, stdout=out_file) if res != 0: self.log( logging.ERROR, "Failed %s -> %s with error code %i. cmd: %s" % (in_file, out_file, res, " ".join(cmd))) return self.log(logging.INFO, "Done %s -> %s" % (in_file, out_file)) self.emit([out_file])
def run(self, wav_file): self.log(logging.INFO, "Starting %s" % (wav_file)) if not wav_file.endswith(".wav"): self.log(logging.ERROR, "Failed %s. Not wav file" % (wav_file)) return new_wav_file = self.derive_new_file_path(wav_file, "wav") if file_utils.should_run(wav_file, new_wav_file): res = shell_run( ["sox", wav_file, "--rate", str(self.new_sr), new_wav_file]) if res != 0: self.log( logging.ERROR, "Failed %s -> %s with lame error code %i" % (wav_file, new_wav_file, res)) return self.log(logging.INFO, "Done %s -> %s" % (wav_file, new_wav_file)) self.emit(new_wav_file)