def _summarize(self, string, mode, postprocess, **kwargs): summary = upperfirst( self._predict(f'{mode}: {summarization_textcleaning(string)}') ) if postprocess and mode != 'tajuk': summary = postprocess_summary(string, summary, **kwargs) return summary
def _summarize( self, strings, top_p=0.7, temperature=1.0, postprocess=True, **kwargs, ): strings_ = [summarization_textcleaning(string) for string in strings] batch_x = [self._tokenizer.encode(string) + [1] for string in strings_] batch_x = pad_sequences(batch_x, padding='post') r = self._execute( inputs=[batch_x, top_p, temperature], input_labels=['Placeholder', 'top_p', 'temperature'], output_labels=['logits'], ) p = r['logits'].tolist() results = [] for no, r in enumerate(p): summary = self._tokenizer.decode(r) if postprocess: summary = postprocess_summary(strings[no], summary, **kwargs) results.append(summary) return results
def _summarize(self, strings, mode, postprocess, **kwargs): summaries = self._predict([ f'ringkasan: {summarization_textcleaning(string)}' for string in strings ]) if postprocess and mode != 'tajuk': summaries = [ postprocess_summary(strings[no], summary, **kwargs) for no, summary in enumerate(summaries) ] return summaries
def _summarize( self, strings, mode, decoder='greedy', top_p=0.7, postprocess=True, **kwargs, ): mode = mode.lower() if mode not in ['ringkasan', 'tajuk']: raise ValueError('mode only supports [`ringkasan`, `tajuk`]') if not 0 < top_p < 1: raise ValueError('top_p must be bigger than 0 and less than 1') decoder = decoder.lower() if decoder not in ['greedy', 'beam', 'nucleus']: raise ValueError( 'mode only supports [`greedy`, `beam`, `nucleus`]') strings_ = [ f'{mode}: {summarization_textcleaning(string)}' for string in strings ] batch_x = [self._tokenizer.encode(string) + [1] for string in strings_] batch_x = padding_sequence(batch_x) r = self._execute( inputs=[batch_x, top_p], input_labels=['Placeholder', 'Placeholder_2'], output_labels=[decoder], ) p = r[decoder].tolist() results = [] for no, r in enumerate(p): summary = self._tokenizer.decode(r) if postprocess and mode != 'tajuk': summary = postprocess_summary(strings[no], summary, **kwargs) results.append(summary) return results