def __init__(self, filename, guessers=(YZPTBGuesser, PrefacedPTBGuesser, CCGbankGuesser, PTBGuesser, CPTBGuesser), default=CCGbankGuesser): '''Initialises a GuessReader with a given set of guessers.''' self.guessers = list(guessers) self.default = default filename_only, index = padded_rsplit(filename, ':', 1) with open(filename_only, 'r') as file: self.preview = (file .read(max(guessers, key=lambda guesser: guesser.bytes_of_context_needed()) .bytes_of_context_needed())) self.reader_class = self.determine_reader(self.preview) self.reader = self.reader_class(filename)
def __iter__(self): path, index = padded_rsplit(self.path, ':', 1) if not os.path.exists(path): # TODO: This doesn't skip the current file (can we do that from inside the iterator?) warn("%s does not exist, so skipping.", path) if self.reader_class: reader_arg = { 'reader': self.reader_class } else: reader_arg = {} if os.path.isdir(path): reader = MultiGuessReader(path, verbose=self.verbose, **reader_arg) else: if self.reader_class: reader = self.reader_class(self.path) else: reader = GuessReader(self.path) for deriv_bundle in reader: yield deriv_bundle
def get_offset(filename): return padded_rsplit(filename, ':', 1)