def accepts(self): return (InputFormat(self, format_id='filtered', extension='.filtered.json'), InputComponent(self, FilterTweets, format_json=self.format_json))
def accepts(self): return (InputFormat(self, format_id='events', extension='.deduplicated'), InputComponent(self, DeduplicateEvents, similarity_threshold=self.similarity_threshold))
def accepts(self): return (InputFormat(self, format_id='tokenized', extension='tok.json'), InputComponent(self, Tokenize, config=self.config, strip_punctuation=self.strip_punctuation, lowercase=self.to_lowercase))
def accepts(self): """Returns a tuple of all the initial inputs and other workflows this component accepts as input (a disjunction, only one will be selected)""" return (InputFormat(self, format_id='tiffdir', extension='tiffdir', directory=True), InputComponent(self, ExtractPages))
def accepts(self): return (InputFormat(self, format_id='enhanced_events', extension='.enhanced'), InputComponent(self, EnhanceEvents, similarity_threshold=self.similarity_threshold))
def accepts(self): """Returns a tuple of all the initial inputs and other workflows this component accepts as input (a disjunction, only one will be selected)""" return ( InputFormat(self, format_id='folia', extension='folia.xml'), InputFormat(self, format_id='txt', extension='txt'), InputComponent(self, ConvertToFoLiA) )
def accepts(self): return (InputFormat(self, format_id='merged_events', extension='.merged'), InputComponent(self, MergeEvents, overlap_threshold=self.overlap_threshold, similarity_threshold=self.similarity_threshold))
def accepts(self): return (InputFormat(self, format_id='cityref', extension='.json'), InputComponent(self, ExtractCityref, config=self.config, strip_punctuation=self.strip_punctuation, to_lowercase=self.to_lowercase, citylist=self.citylist, skip_date=self.skip_date, skip_month=self.skip_month, skip_timeunit=self.skip_timeunit, skip_day=self.skip_day))
def accepts(self): return (InputFormat(self, format_id='dateref', extension='.dateref.json'), InputComponent(self, ExtractDateref, config=self.config, strip_punctuation=self.strip_punctuation, to_lowercase=self.to_lowercase, skip_datematch=self.skip_date, skip_monthmatch=self.skip_month, skip_timeunitmatch=self.skip_timeunit, skip_daymatch=self.skip_day))
def accepts(self): return (InputFormat(self, format_id='txt', extension='txt'), InputComponent(self, Lowercaser))
def accepts(self): """Returns a tuple of all the initial inputs and other workflows this component accepts as input (a disjunction, only one will be selected)""" return InputComponent(self, OCR_document)