def test_CommunicationWriterTGZ_single_file_default_name(output_file, login_info): comm = read_communication_from_file("tests/testdata/simple_1.concrete") writer = CommunicationWriterTGZ() try: writer.open(output_file) writer.write(comm) finally: writer.close() assert tarfile.is_tarfile(output_file) f = tarfile.open(output_file) tarinfo = f.next() assert tarinfo is not None assert comm.uuid.uuidString + ".concrete" == tarinfo.name assert tarinfo.isreg() assert tarinfo.mtime > time.time() - TIME_MARGIN assert os.stat("tests/testdata/simple_1.concrete").st_size == tarinfo.size assert 0644 == tarinfo.mode assert login_info["uid"] == tarinfo.uid assert login_info["username"] == tarinfo.uname assert login_info["gid"] == tarinfo.gid assert login_info["groupname"] == tarinfo.gname tarinfo = f.next() assert tarinfo is None f.close()
def test_CommunicationWriterTGZ_single_file_default_name(output_file): comm = read_communication_from_file("tests/testdata/simple_1.concrete") writer = CommunicationWriterTGZ() writer.open(output_file) writer.write(comm) writer.close() assert tarfile.is_tarfile(output_file) f = tarfile.open(output_file) tarinfo = f.next() assert tarinfo is not None assert comm.uuid.uuidString + '.concrete' == tarinfo.name assert tarinfo.isreg() assert tarinfo.mtime > time.time() - TIME_MARGIN assert os.stat('tests/testdata/simple_1.concrete').st_size == tarinfo.size assert 0644 == tarinfo.mode assert os.getuid() == tarinfo.uid assert pwd.getpwuid(os.getuid()).pw_name == tarinfo.uname assert os.getgid() == tarinfo.gid assert grp.getgrgid(os.getgid()).gr_name == tarinfo.gname tarinfo = f.next() assert tarinfo is None f.close()
ofd = CommunicationWriterTGZ(options.output) with reader(gzip.open(options.input)) as ifd: for i, line in enumerate(ifd): toks = line.strip().split("\t") if len(toks) != 3: continue cid, label, text = toks g = ugf.create() t = int(time()) comm = Communication(id=cid, uuid=g.next(), type="Text document", text=text, communicationTaggingList=[CommunicationTagging(uuid=g.next(), metadata=AnnotationMetadata(tool="Gold labeling", timestamp=t, kBest=1, ), taggingType=options.tag_type, tagList=[label], confidenceList=[1.0], )], metadata=AnnotationMetadata(tool="text_to_concrete.py ingester", timestamp=t, kBest=1), sectionList=[Section(uuid=g.next(), textSpan=TextSpan(start=0, ending=len(text)), kind="content", ) ]) ofd.write(comm) ofd.close()