def setup_class(cls): # create a sample dataset to test cls.dataset = Dataset() part = Part('some text c.A100G p.V100Q some text') part.sentences = [[ Token('some', 0), Token('text', 5), Token('c', 10), Token('.', 11), Token('A', 12), Token('100', 13), Token('G', 16), Token('p', 18), Token('.', 19), Token('V', 20), Token('100', 21), Token('Q', 24), Token('some', 26), Token('text', 31) ]] predicted_labels = [ 'O', 'O', 'B', 'I', 'I', 'I', 'E', 'A', 'I', 'I', 'I', 'E', 'O', 'O' ] for index, label in enumerate(predicted_labels): part.sentences[0][index].predicted_labels = [Label(label)] cls.dataset.documents['doc_1'] = Document() cls.dataset.documents['doc_1'].parts['p1'] = part part = Part('test edge case DNA A927B test') part.sentences = [[ Token('test', 0), Token('edge', 5), Token('case', 10), Token('DNA', 15), Token('A', 19), Token('927', 20), Token('B', 23), Token('test', 25) ]] predicted_labels = ['O', 'O', 'O', 'O', 'M', 'P', 'M', 'O'] for index, label in enumerate(predicted_labels): part.sentences[0][index].predicted_labels = [Label(label)] cls.dataset.documents['doc_1'].parts['p2'] = part
def setUp(self): part = Part('Make making made. Try tried tries.') part.sentences = [[Token('Make', 0), Token('making', 5), Token('made', 12)], [Token('Try', 18), Token('tried', 22), Token('tries', 28)]] self.dataset = Dataset() self.dataset.documents['doc_1'] = Document() self.dataset.documents['doc_1'].parts['part_1'] = part self.generator = PorterStemFeatureGenerator()
def setUp(self): part = Part('Make making made. Try tried tries.') part.sentences = [[ Token('Make', 0), Token('making', 5), Token('made', 12) ], [Token('Try', 18), Token('tried', 22), Token('tries', 28)]] self.dataset = Dataset() self.dataset.documents['doc_1'] = Document() self.dataset.documents['doc_1'].parts['part_1'] = part for token in self.dataset.tokens(): token.features['a'] = 'a' token.features['b'] = 'b'
def setUp(self): part = Part('Word1 word2 word3. Word4 word5 word6.') part.sentences = [[ Token('Word1', 0), Token('word2', 6), Token('word3', 12) ], [Token('Word4', 19), Token('word5', 25), Token('word6', 31)]] self.dataset = Dataset() self.dataset.documents['doc_1'] = Document() self.dataset.documents['doc_1'].parts['part_1'] = part self.simple_generator = SimpleFeatureGenerator() self.sentence_generator = SentenceMarkerFeatureGenerator()