def main(): """ Creates a temporary file for the given input which is used to create a dataset, that is then evaluated on the given model. The generated summary is printed to standard out. """ args, unknown_args = prepare_arg_parser().parse_known_args() model_file = args.model_file with suppress_stdout_stderr(): model, _optimizer, vocab, _stats, cfg = train.load_model( model_file, unknown_args ) _, filename = tempfile.mkstemp() try: with open(filename, "a") as f: input_ = sys.stdin.read() article = preprocess.parse(input_) print(f"{article}\tSUMMARY_STUB", file=f) with suppress_stdout_stderr(): dataset = Dataset(filename, vocab, cfg) batch = next(dataset.generator(1, cfg.pointer)) # don't enforce any min lengths (useful for short cmdline summaries") setattr(cfg, "min_summary_length", 1) bs = BeamSearch(model, cfg=cfg) summary = evaluate.batch_to_text(bs, batch)[0] print(f"SUMMARY:\n{summary}") finally: os.remove(filename)
def main(): """ Run evaluation for a model on a test file of given set size. The model will by default run 50 samples. Prints rouge variance of rouge scores and validation score to console, as well as the avg. set validation time. """ args, unknown_args = prepare_arg_parser().parse_known_args() model_file = args.model_file test_file = args.test_file set_size = args.set_size samples = args.samples with suppress_stdout_stderr(): model, _optimizer, vocab, _stats, cfg = train.load_model( model_file, unknown_args) model.eval() dataset = Dataset(test_file, vocab, cfg) r1_scores = [] r2_scores = [] rl_scores = [] validation_scores = [] times = [] for i in range(samples): print("Step:", i) start = time.time() with suppress_stdout_stderr(): scores = evaluate.evaluate(model, dataset, cfg, limit=set_size, shuffle=True) r1_scores.append(scores["rouge-1"]["f"] * 100) r2_scores.append(scores["rouge-2"]["f"] * 100) rl_scores.append(scores["rouge-l"]["f"] * 100) validation_scores.append(calc_validation_score(scores)) times.append(time.time() - start) print(f"Test File: {test_file}, Set Size: {set_size}, Samples: {samples}") r1_v, r2_v, rl_v = pvariance(r1_scores), pvariance(r2_scores), pvariance( rl_scores) valid_v = pvariance(validation_scores) avg_time = mean(times) print( "r1_variance,r2_variance,rl_variance,validation_variance,avg_time_for_set" ) print("%.2f,%.2f,%.2f,%.2f,%d" % (r1_v, r2_v, rl_v, valid_v, avg_time))
def continous(): MODELDIR = "pocketsphinx/model" DATADIR = "pocketsphinx/test/data" # Create a decoder with certain model config = Decoder.default_config() config.set_string('-hmm', os.path.join(MODELDIR, 'en-us/en-us')) config.set_string('-lm', os.path.join(MODELDIR, 'en-us/en-us.lm.bin')) config.set_string('-dict', os.path.join(MODELDIR, 'en-us/cmudict-en-us.dict')) # Decode streaming data. try: with suppress_stdout_stderr(): decoder = Decoder(config) except RuntimeError: time.sleep(1) # try waiting and trying again decoder = Decoder(config) looper = loop_decode(decoder=decoder) x = looper.next() assert(x == None) for y in xrange(0,10): x = looper.next() print "Guess:", x.hypothesis.hypstr print "Elapsed (milliseconds): ", x.elasped_time / 1000.0 print "="*50
def loop_decode(seconds = 3, decoder = None): CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 RECORD_SECONDS = seconds cu = None try: while True: yield cu with suppress_stdout_stderr(): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("* recording") frames = [] n_frames = int(RATE / CHUNK * RECORD_SECONDS) n1 = datetime.datetime.now() decoder.start_utt() for i in range(0, n_frames): data = stream.read(CHUNK) with suppress_stdout_stderr(): if data: decoder.process_raw(data, False, False) frames.append(data) if not i%10: print i print("* done recording") decoder.end_utt() stream.stop_stream() stream.close() p.terminate() with suppress_stdout_stderr(): cu = CandiateUtterance() cu.hypothesis = decoder.hyp() cu.hypothesis_segments = [seg.word for seg in decoder.seg()] cu.nbest = zip(range(10), decoder.nbest()) n2 = datetime.datetime.now() cu.elasped_time = (n2-n1).microseconds except GeneratorExit: #finished pass
def fb_fit_predict(station_measure, df, scale, changepoints, future, verbose_level: int=500): station, measure = station_measure if verbose_level >= 2: print('fitting model on {} {}'.format(station, measure)) model = Prophet(changepoint_prior_scale=scale, n_changepoints=changepoints) model_data = df.loc[df.stationId == station][['utc_time', measure]].copy() model_data.columns = ['ds', 'y'] with suppress_stdout_stderr(): model.fit(model_data) forecast = model.predict(future) return [station, measure, forecast['yhat'].values]
def get_scores(self, hypothesis, references): """ Get rouge scores as a dict of format: {"rouge-1": {"p": 0.5, "r": 0.3, "f": 0.4}, "rouge-2: ..., "rouge-l: ... } :param references: A list of reference summaries :param hypothesis: A list of corresponding summaries :returns: A dictionary with the rouge scores """ refs_split = list(map(self.split_sentences, references)) hyps_split = list(map(self.split_sentences, hypothesis)) if self.use_python: return self._get_scores_python(hyps_split, refs_split) with suppress_stdout_stderr(): return self._get_scores_perl(hyps_split, refs_split)