def createComm(cid, ctype, txt): '''Creates concrete communication file from text''' comm = Communication() comm.id = cid comm.uuid = concrete.util.generate_UUID() comm.type = ctype txt = re.sub('[\xa0\xc2]', ' ', txt) txt = re.sub(r'\s*\n\s*', '\n', txt) if not txt.strip(): return None comm.text = txt comm.metadata = create_dummy_annotation() breaks = [ i for i, ch in enumerate(txt) if ch == '\n' and i > 0 and txt[i - 1] != '\n' ] if not breaks or breaks[-1] != len(txt) - 1: breaks += [len(txt)] sections = [] start = 0 for i in breaks: sec = concrete.Section() sec.uuid = concrete.util.generate_UUID() sec.kind = "Passage" sec.textSpan = concrete.TextSpan(start, i) sections.append(sec) start = i comm.sectionList = sections if not concrete.validate.validate_communication(comm): return None return comm
def test_validate_minimal_communication_with_uuid(): comm = Communication() comm.id = "myID" comm.metadata = AnnotationMetadata(tool="TEST", timestamp=int(time.time())) comm.type = "Test Communication" comm.uuid = generate_UUID() assert validate_communication(comm)
def test_validate_minimal_communication_with_uuid(): comm = Communication() comm.id = "myID" comm.metadata = AnnotationMetadata( tool="TEST", timestamp=int(time.time())) comm.type = "Test Communication" comm.uuid = generate_UUID() assert validate_communication(comm)
def createComm(fn): with codecs.open(fn, 'r', 'utf-8') as f: txt = f.read() comm = Communication() comm.id = fn comm.uuid = concrete.util.generate_UUID() comm.type = "QUORA ANSWER" if fn.split('/')[-1].startswith( "answer") else "QUORA QUESTION" txt = re.sub('[\xa0\xc2]', ' ', txt) txt = re.sub(r'\s*\n\s*', '\n', txt) if not txt.strip(): return None comm.text = txt comm.metadata = create_dummy_annotation() breaks = [ i for i, ch in enumerate(txt) if ch == '\n' and i > 0 and txt[i - 1] != '\n' ] if not breaks or breaks[-1] != len(txt) - 1: breaks += [len(txt)] sections = [] start = 0 for i in breaks: sec = concrete.Section() sec.uuid = concrete.util.generate_UUID() sec.kind = "Passage" sec.textSpan = concrete.TextSpan(start, i) sections.append(sec) start = i comm.sectionList = sections if not concrete.validate.validate_communication(comm): return None return comm