def _summarize(self, string, mode, postprocess, **kwargs): summary = upperfirst(self._predict(f'{mode}: {cleaning(string)}')) if postprocess: summary = filter_rouge(string, summary, **kwargs) summary = postprocessing_summarization(summary) summary = find_lapor_and_remove(string, summary) return summary
def _summarize( self, strings, mode, top_p=0.7, temperature=1.0, postprocess=True, **kwargs, ): mode = mode.lower() if mode not in ['ringkasan', 'tajuk']: raise ValueError('mode only supports [`ringkasan`, `tajuk`]') strings_ = [f'{mode}: {cleaning(string)}' for string in strings] batch_x = [self._tokenizer.encode(string) + [1] for string in strings_] batch_x = pad_sequences(batch_x, padding='post', maxlen=self._maxlen) r = self._execute( inputs=[batch_x, top_p, temperature], input_labels=['Placeholder', 'top_p', 'temperature'], output_labels=['logits'], ) p = r['logits'].tolist() results = [] for no, r in enumerate(p): summary = self._tokenizer.decode(r) if postprocess and mode != 'tajuk': summary = filter_rouge(strings[no], summary, **kwargs) summary = postprocessing_summarization(summary) summary = find_lapor_and_remove(strings[no], summary) results.append(summary) return results
def _summarize( self, strings, mode, decoder='greedy', top_p=0.7, postprocess=True, **kwargs, ): mode = mode.lower() if mode not in ['ringkasan', 'tajuk']: raise ValueError('mode only supports [`ringkasan`, `tajuk`]') if not 0 < top_p < 1: raise ValueError('top_p must be bigger than 0 and less than 1') decoder = decoder.lower() if decoder not in ['greedy', 'beam', 'nucleus']: raise ValueError( 'mode only supports [`greedy`, `beam`, `nucleus`]') strings_ = [f'{mode}: {cleaning(string)}' for string in strings] batch_x = [self._tokenizer.encode(string) + [1] for string in strings_] batch_x = padding_sequence(batch_x) r = self._execute( inputs=[batch_x, top_p], input_labels=['Placeholder', 'Placeholder_2'], output_labels=[decoder], ) p = r[decoder].tolist() results = [] for no, r in enumerate(p): summary = self._tokenizer.decode(r) if postprocess and mode != 'tajuk': summary = filter_rouge(strings[no], summary, **kwargs) summary = postprocessing_summarization(summary) summary = find_lapor_and_remove(strings[no], summary) results.append(summary) return results
def _summarize( self, strings, mode, decoder='greedy', top_p=0.7, postprocess=True, **kwargs, ): mode = mode.lower() if mode not in ['ringkasan', 'tajuk']: raise ValueError('mode only supports [`ringkasan`, `tajuk`]') if not 0 < top_p < 1: raise ValueError('top_p must be bigger than 0 and less than 1') decoder = decoder.lower() output = self._mapping.get(decoder) if not decoder: raise ValueError( 'mode only supports [`greedy`, `beam`, `nucleus`]') strings_ = [f'{mode}: {cleaning(string)}' for string in strings] batch_x = [self._tokenizer.encode(string) + [1] for string in strings_] batch_x = padding_sequence(batch_x) p = self._sess.run(output, feed_dict={ self._X: batch_x, self._top_p: top_p }).tolist() results = [] for no, r in enumerate(p): summary = self._tokenizer.decode(r) if postprocess: summary = filter_rouge(strings[no], summary, **kwargs) summary = postprocessing_summarization(summary) summary = find_lapor_and_remove(strings[no], summary) results.append(summary) return results