Example #1
0
 def accepts(self):
     return (InputFormat(self,
                         format_id='filtered',
                         extension='.filtered.json'),
             InputComponent(self,
                            FilterTweets,
                            format_json=self.format_json))
Example #2
0
 def accepts(self):
     return (InputFormat(self,
                         format_id='events',
                         extension='.deduplicated'),
             InputComponent(self,
                            DeduplicateEvents,
                            similarity_threshold=self.similarity_threshold))
Example #3
0
 def accepts(self):
     return (InputFormat(self, format_id='tokenized', extension='tok.json'),
             InputComponent(self,
                            Tokenize,
                            config=self.config,
                            strip_punctuation=self.strip_punctuation,
                            lowercase=self.to_lowercase))
Example #4
0
 def accepts(self):
     """Returns a tuple of all the initial inputs and other workflows this component accepts as input (a disjunction, only one will be selected)"""
     return (InputFormat(self,
                         format_id='tiffdir',
                         extension='tiffdir',
                         directory=True),
             InputComponent(self, ExtractPages))
Example #5
0
 def accepts(self):
     return (InputFormat(self,
                         format_id='enhanced_events',
                         extension='.enhanced'),
             InputComponent(self,
                            EnhanceEvents,
                            similarity_threshold=self.similarity_threshold))
Example #6
0
 def accepts(self):
     """Returns a tuple of all the initial inputs and other workflows this component accepts as input (a disjunction, only one will be selected)"""
     return (
         InputFormat(self, format_id='folia', extension='folia.xml'),
         InputFormat(self, format_id='txt', extension='txt'),
         InputComponent(self, ConvertToFoLiA)
     )
Example #7
0
 def accepts(self):
     return (InputFormat(self,
                         format_id='merged_events',
                         extension='.merged'),
             InputComponent(self,
                            MergeEvents,
                            overlap_threshold=self.overlap_threshold,
                            similarity_threshold=self.similarity_threshold))
Example #8
0
 def accepts(self):
     return (InputFormat(self, format_id='cityref', extension='.json'),
             InputComponent(self,
                            ExtractCityref,
                            config=self.config,
                            strip_punctuation=self.strip_punctuation,
                            to_lowercase=self.to_lowercase,
                            citylist=self.citylist,
                            skip_date=self.skip_date,
                            skip_month=self.skip_month,
                            skip_timeunit=self.skip_timeunit,
                            skip_day=self.skip_day))
Example #9
0
 def accepts(self):
     return (InputFormat(self,
                         format_id='dateref',
                         extension='.dateref.json'),
             InputComponent(self,
                            ExtractDateref,
                            config=self.config,
                            strip_punctuation=self.strip_punctuation,
                            to_lowercase=self.to_lowercase,
                            skip_datematch=self.skip_date,
                            skip_monthmatch=self.skip_month,
                            skip_timeunitmatch=self.skip_timeunit,
                            skip_daymatch=self.skip_day))
Example #10
0
 def accepts(self):
     return (InputFormat(self, format_id='txt',
                         extension='txt'), InputComponent(self, Lowercaser))
Example #11
0
 def accepts(self):
     """Returns a tuple of all the initial inputs and other workflows this component accepts as input (a disjunction, only one will be selected)"""
     return InputComponent(self, OCR_document)