def _prepare_token_array(self): # TODO: the lazy init should move to somewhere else # clear the suffix array and LCP array cache self.cached_suffix_array = None token_array_position = 0 for idx, witness in enumerate(self.witnesses): # print("witness.tokens",witness.tokens()) witness_range = RangeSet() witness_range.add_range(self.counter, self.counter + len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 1 self.witness_ranges[witness.sigil] = witness_range # remember get tokens twice sigil = witness.sigil for token in witness.tokens(): token.token_data['_sigil'] = sigil token.token_data[ '_token_array_position'] = token_array_position token_array_position += 1 self.token_array.extend(witness.tokens()) # # add marker token self.token_array.append( Token({ "n": '$' + str(idx), '_sigil': sigil })) token_array_position += 1 self.token_array.pop() # remove last marker
def _prepare_token_array(self): # TODO: the lazy init should move to somewhere else # clear the suffix array and LCP array cache self.cached_suffix_array = None for idx, witness in enumerate(self.witnesses): witness_range = RangeSet() witness_range.add_range(self.counter, self.counter + len(witness.tokens())) # the extra one is for the marker token self.counter += len(witness.tokens()) + 1 self.witness_ranges[witness.sigil] = witness_range if self.token_array: # add marker token self.token_array.append(Token({"n": "$" + str(idx - 1)})) # remember get tokens twice self.token_array.extend(witness.tokens())
def test_invalid_token_raises_exception(self): with self.assertRaises(TokenError): #data = {'x': 'abc'} data = {} Token(data)
def test_creation_token_none(self): t = Token(None) self.assertEqual(t.token_string, '') self.assertIsNone(t.token_data)
def test_creation_token_n(self): data = {'t': 'kitten', 'n': 'cat'} t = Token(data) self.assertEqual(t.token_string, 'cat') self.assertEqual(t.token_data, data)
def test_creation_token_t(self): data = {'t': 'fox', 'id': 123 } t = Token(data) self.assertEqual(t.token_string, 'fox') self.assertEqual(t.token_data, data)