def test_id(self): """ Adding the id after creation should overwrite the automatic ID """ r = Entry(_auto_id=True) j = r.jsonld() assert '@id' in j r.id = "test" j2 = r.jsonld() assert j2['@id'] == 'test' assert 'id' not in j2
def test_id(self): """ Adding the id after creation should overwrite the automatic ID """ r = Entry() j = r.jsonld() assert '@id' in j r.id = "test" j2 = r.jsonld() assert j2['@id'] == 'test' assert 'id' not in j2
def analyse_entry(self, entry, params): chunker_type = params["delimiter"] original_text = entry['nif:isString'] if chunker_type == "sentence": tokenizer = PunktSentenceTokenizer() if chunker_type == "paragraph": tokenizer = LineTokenizer() chars = list(tokenizer.span_tokenize(original_text)) for i, chunk in enumerate(tokenizer.tokenize(original_text)): print(chunk) e = Entry() e['nif:isString'] = chunk if entry.id: e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1]) yield e
def analyse_entry(self, entry, params): yield entry chunker_type = params["delimiter"] original_text = entry['nif:isString'] if chunker_type == "sentence": tokenizer = PunktSentenceTokenizer() if chunker_type == "paragraph": tokenizer = LineTokenizer() chars = list(tokenizer.span_tokenize(original_text)) if len(chars) == 1: # This sentence was already split return for i, chunk in enumerate(chars): start, end = chunk e = Entry() e['nif:isString'] = original_text[start:end] if entry.id: e.id = entry.id + "#char={},{}".format(start, end) yield e
def analyse_entry(self, entry, activity): yield entry chunker_type = activity.params["delimiter"] original_text = entry['nif:isString'] if chunker_type == "sentence": tokenizer = PunktSentenceTokenizer() if chunker_type == "paragraph": tokenizer = LineTokenizer() chars = list(tokenizer.span_tokenize(original_text)) if len(chars) == 1: # This sentence was already split return for i, chunk in enumerate(chars): start, end = chunk e = Entry() e['nif:isString'] = original_text[start:end] if entry.id: e.id = entry.id + "#char={},{}".format(start, end) yield e