def testUnknownColumn(self): self.interceptLogs('otplc.colspec') colspec = [C._UNKNOWN, C.TOKEN] C.from_integers(colspec) self.test_log.assertMatches( u'ignoring _UNKNOWN column %s', args=(1,), levelname='INFO' )
def guess_colspec(otpl_reader): """ Note that for guessing to work, the optionally present global enumeration column must be placed *before* the (also optional) local enumeration column. If the input file has a colspec header, that header is used instead of any guessing. :param otpl_reader: a reader instance :type otpl_reader: OtplReader :raises AttributeError: if the reader has an undefined separator property :returns: a :class:`ColumnSpecification` or ``None`` if the guessing fails """ try: guess = _make_guess(otpl_reader) except (IOError, UnicodeDecodeError, DataFormatError) as e: L.warning(str(e)) guess = [] if isinstance(guess, Spec): L.info(u'from header: %s', str(guess)) return guess elif len(guess) < 2: L.warning(u'failed for "%s"', otpl_reader.path) L.debug(u'discarded guess was: %s', Spec.to_string(guess)) return None else: L.debug(u'as: %s', Spec.to_string(guess)) return Spec.from_integers(guess)
def testParseColspec(self): self.interceptLogs('otplc.colspec') # noinspection PyUnresolvedReferences names, values = zip(*C.NAMES.items()) self.assertSequenceEqual(values, C.parse_colspec(' '.join(names))) self.test_log.assertMatches( u'using an internal colspec type; probably a Bad Idea', levelname='WARNING', count=2 )
def testInitialization(self): colspec = [ C.SEGMENT_ID, C.GLOBAL_ENUM, C.LOCAL_ENUM, C.TOKEN, C.POS_TAG, C.LOCAL_REF, C.RELATION, C.ENTITY, C.GLOBAL_REF, C.GLOBAL_REF, C.EVENT, C.ATTRIBUTE, C.NORMALIZATION, C.LOCAL_REF, C.LOCAL_REF, C.EVENT, ] converter = C.from_integers(colspec) self.assertEqual(1, converter._global_enum) self.assertEqual(2, converter._local_enum) self.assertEqual(3, converter._token) self.assertEqual(4, converter._pos_tag) self.assertEqual({7, }, converter._entities) self.assertEqual({8: 7, 9: 7}, converter._global_refs) self.assertEqual({6: 4}, converter._relations) self.assertEqual({10: (8, (9,)), 15: (13, (14,))}, converter._events) self.assertEqual({12: 10}, converter._normalizations) # important: norm of event! self.assertEqual({11: 10}, converter._attributes)
def _make_guess(segments): guess = None last_round = False for idx, segment in enumerate(segments): if not guess: # noinspection PyUnresolvedReferences if len(segment) == 1 and all( n.split(u':')[0] in Spec.NAMES for n in segment[0] ): return Spec.from_string(' '.join(segment[0])) guess = Guess(segment) else: guess.update(segment) if idx > 4 or last_round: break elif guess.complete(): last_round = True return guess.guess