def text_from_file(self, filepath):
     if self.options.pdf_input:
         return pdf2text(filepath)
     elif 's3://' in filepath:
         return self.s3open(filepath)
     else:
         return codecs.open(filepath, 'r', 'utf-8').read()
예제 #2
0
 def text_from_file(self, filepath):
     if self.options.pdf_input:
         return pdf2text(filepath)
     elif 's3://' in filepath:
         return self.s3open(filepath)
     else:
         return codecs.open(filepath, 'r', 'utf-8').read()
 def mapper(self, _, filepath):
     filepath = filepath.strip('\n').strip()
     filetext = pdf2text(filepath)
     if not filetext.strip():
         yield None, filepath
예제 #4
0
 def mapper(self, _, filepath):
     filepath = filepath.strip('\n').strip()
     filetext = pdf2text(filepath)
     if not filetext.strip():
         yield None, filepath