Example #1
0
 def test_to_sdp(self):
     passage = convert.from_standard(TestUtil.load_xml("test_files/standard3.xml"))
     converted = convert.to_sdp(passage)
     with open("test_files/standard3.sdp", encoding="utf-8") as f:
         # f.write("\n".join(converted))
         self.assertSequenceEqual(converted, f.read().splitlines() + [""])
     converted_passage = next(convert.from_sdp(converted, passage.ID))
     # ioutil.passage2file(converted_passage, "test_files/standard3.sdp.xml")
     ref = convert.from_standard(TestUtil.load_xml("test_files/standard3.sdp.xml"))
     self.assertTrue(converted_passage.equals(ref))
Example #2
0
 def test_to_export(self):
     passage = convert.from_standard(TestUtil.load_xml('test_files/standard3.xml'))
     converted = convert.to_export(passage)
     with open('test_files/standard3.export') as f:
         # f.write("\n".join(converted))
         self.assertSequenceEqual(converted, f.read().splitlines())
     converted_passage = next(convert.from_export(converted, passage.ID))
     # ioutil.passage2file(converted_passage, 'test_files/standard3.export.xml')
     ref = convert.from_standard(TestUtil.load_xml('test_files/standard3.export.xml'))
     self.assertTrue(converted_passage.equals(ref))
Example #3
0
 def test_to_conll(self):
     passage = convert.from_standard(TestUtil.load_xml("test_files/standard3.xml"))
     converted = convert.to_conll(passage)
     with open("test_files/standard3.conll", encoding="utf-8") as f:
         # f.write("\n".join(converted))
         self.assertSequenceEqual(converted, f.read().splitlines() + [""])
     converted_passage = next(convert.from_conll(converted, passage.ID))
     # ioutil.passage2file(converted_passage, "test_files/standard3.conll.xml")
     ref = convert.from_standard(TestUtil.load_xml("test_files/standard3.conll.xml"))
     self.assertTrue(converted_passage.equals(ref))
     # Put the same sentence twice and try converting again
     for converted_passage in convert.from_conll(converted * 2, passage.ID):
         ref = convert.from_standard(TestUtil.load_xml("test_files/standard3.conll.xml"))
     self.assertTrue(converted_passage.equals(ref))
Example #4
0
def test_preannotate_passage(create, as_array, convert_and_back, partial, monkeypatch):
    if not partial:
        monkeypatch.setattr(textutil, "get_nlp", assert_spacy_not_loaded)
    passage = create()
    l0 = passage.layer(layer0.LAYER_ID)
    attr_values = list(range(10, 10 + len(textutil.Attr)))
    if partial:
        attr_values[textutil.Attr.ENT_TYPE.value] = ""
    if as_array:
        l0.extra["doc"] = [len(p) * [attr_values] for p in textutil.break2paragraphs(passage, return_terminals=True)]
    else:
        for terminal in l0.all:
            for attr, value in zip(textutil.Attr, attr_values):
                if value:
                    terminal.extra[attr.key] = value
    passage = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back]
    if not partial:
        assert textutil.is_annotated(passage, as_array=as_array, as_extra=not as_array), \
            "Passage %s is not pre-annotated" % passage.ID
    textutil.annotate(passage, as_array=as_array, as_extra=not as_array)
    assert textutil.is_annotated(passage, as_array=as_array, as_extra=not as_array), \
        "Passage %s is not annotated" % passage.ID
    for terminal in l0.all:
        for i, (attr, value) in enumerate(zip(textutil.Attr, attr_values)):
            if value:
                assert (terminal.tok[i] if as_array else terminal.extra.get(attr.key)) == value, \
                    "Terminal %s has wrong %s" % (terminal, attr.name)
Example #5
0
 def __init__(self, f):
     if isinstance(f, Node):
         self.root = f
     else:
         print("Reading '%s'..." % f)
         passage = convert.from_standard(ET.parse(f).getroot())
         self.root = Node('ROOT')
         children = [self.build(x) for l in passage.layers
                     for x in l.all if not x.incoming]
         self.root.set_children_binarized(children)
Example #6
0
def visualize():
    xml = request.get_data()
    passage = from_standard(fromstring(xml))
    print("Visualizing passage %s: %s" % (passage.ID, passage.layer(layer1.LAYER_ID).heads[0]))
    canvas = FigureCanvasAgg(plt.figure())
    draw(passage)
    image = BytesIO()
    canvas.print_png(image)
    data = b64encode(image.getvalue()).decode()
    return Response(quote(data.rstrip("\n")))
Example #7
0
def test_annotate_passage(create, as_array):
    passage = create()
    textutil.annotate(passage, as_array=as_array)
    for p in passage, convert.from_standard(convert.to_standard(passage)):
        assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID
        for terminal in p.layer(layer0.LAYER_ID).all:
            if as_array:
                assert terminal.tok is not None, "Terminal %s has no annotation" % terminal
                assert len(terminal.tok) == len(textutil.Attr)
            else:
                for attr in textutil.Attr:
                    assert attr.key in terminal.extra, "Terminal %s has no %s" % (terminal, attr.name)
Example #8
0
File: ioutil.py Project: borgr/ucca
def file2passage(filename):
    """Opens a file and returns its parsed Passage object
    Tries to read both as a standard XML file and as a binary pickle
    :param filename: file name to write to
    """
    try:
        with open(filename) as f:
            etree = ElementTree().parse(f)
        return from_standard(etree)
    except Exception as e:
        try:
            with open(filename, 'rb') as h:
                return pickle.load(h)
        except Exception:
            raise e
Example #9
0
def test_annotate_all(as_array, convert_and_back):
    passages = [create() for create in PASSAGES]
    list(textutil.annotate_all(passages))
    for passage, compare in textutil.annotate_all(((p, p) for p in passages), as_array=as_array, as_tuples=True):
        assert passage is compare
        p = (passage, convert.from_standard(convert.to_standard(passage)))[convert_and_back]
        assert textutil.is_annotated(p, as_array=as_array), "Passage %s is not annotated" % passage.ID
        for terminal in p.layer(layer0.LAYER_ID).all:
            if as_array:
                assert terminal.tok is not None, "Terminal %s in passage %s has no annotation" % (terminal, passage.ID)
                assert len(terminal.tok) == len(textutil.Attr)
            else:
                for attr in textutil.Attr:
                    assert attr.key in terminal.extra, "Terminal %s in passage %s has no %s" % (
                        terminal, passage.ID, attr.name)
Example #10
0
 def test_from_standard(self):
     passage = convert.from_standard(TestUtil.load_xml("test_files/standard3.xml"))
     ref = convert.from_site(TestUtil.load_xml("test_files/site3.xml"))
     self.assertTrue(passage.equals(ref, ordered=True))
Example #11
0
def loaded_valid():
    return convert.from_standard(load_xml("test_files/standard3_valid.xml"))
Example #12
0
def loaded():
    return convert.from_standard(load_xml("test_files/standard3.xml"))
Example #13
0
                    x.append(par)
            if I == []:
                x.append(par)
        y.append(x)

    return (y)


index = list(range(0, 100))

for t in index:
    f1 = open('UCCAannotated_source/%s.xml' % t)
    xml_string1 = f1.read()
    f1.close()
    xml_object1 = fromstring(xml_string1)
    P1 = convert.from_standard(xml_object1)  #for semi-automatic SAMSA
    L1 = get_num_scenes(P1)
    L2 = get_num_sentences('%s.txt' % t)
    M1 = get_cmrelations(P1)
    A1 = get_cparticipants(P1)

    #print(L1)
    #print(L2)
    #print(M1)
    #print(A1)
    if L1 < L2:
        score = 0

    elif L1 == L2:
        f1 = open(
            'scene_sentence_alignment_output/a%s.txt' % t
Example #14
0
 def test_to_site(self):
     passage = convert.from_standard(
         TestUtil.load_xml("test_files/standard3.xml"))
     root = convert.to_site(passage)
     copy = convert.from_site(root)
     self.assertTrue(passage.equals(copy))
Example #15
0
    return (output)


def get_sentences(P):
    """
    P is the output of the simplification system. Return all the sentences in each passage
    """
    dirpath = '/Mypath/System_output'
    folder = nltk.data.find(dirpath)
    corpusReader = nltk.corpus.PlaintextCorpusReader(folder, P)
    d = len(corpusReader.sents())
    return (corpusReader.sents()[:d])


index = list(range(0, 100))

for t in index:
    f1 = open('UCCAannotated_source/%s.xml' % t)
    xml_string1 = f1.read()
    f1.close()
    xml_object1 = fromstring(xml_string1)
    P1 = convert.from_standard(
        xml_object1)  #from_site for the semi-automatic version of SAMSA
    L1 = get_scenes(P1)
    L2 = get_sentences('%s.txt' % t)
    s = open('s%s.txt' % t, 'w')
    s.write('%s\n' % L1)
    s.write('%s\n' % L2)

    s.close()
Example #16
0
 def test_to_site(self):
     passage = convert.from_standard(self._load_xml('./standard3.xml'))
     root = convert.to_site(passage)
     copy = convert.from_site(root)
     self.assertTrue(passage.equals(copy))
Example #17
0
 def test_from_standard(self):
     passage = convert.from_standard(self._load_xml('./standard3.xml'))
     ref = convert.from_site(self._load_xml('./site3.xml'))
     self.assertTrue(passage.equals(ref, ordered=True))
def file2passage(filename):
    "Opens a standard xml file and returns its parsed Passage object"
    with open(filename) as f:
        etree = ElementTree().parse(f)
    return convert.from_standard(etree)
Example #19
0
def download():
    xml = request.values["input"]
    out_format = request.values["format"]
    print("Converting to " + out_format)
    out = xml if out_format == "xml" else "\n".join(TO_FORMAT[out_format](from_standard(fromstring(xml))))
    return Response(out, headers={"Content-Type": CONTENT_TYPES.get(out_format, "text/plain")})
Example #20
0
 def test_to_text(self):
     passage = convert.from_standard(TestUtil.load_xml("test_files/standard3.xml"))
     self.assertEqual(convert.to_text(passage, False)[0],
                      "1 2 3 4 . 6 7 8 9 10 . 12 13 14 15")
     self.assertSequenceEqual(convert.to_text(passage, True),
                              ["1 2 3 4 .", "6 7 8 9 10 .", "12 13 14 15"])
Example #21
0
 def test_to_site(self):
     passage = convert.from_standard(TestUtil.load_xml("test_files/standard3.xml"))
     root = convert.to_site(passage)
     copy = convert.from_site(root)
     self.assertTrue(passage.equals(copy))
Example #22
0
 def test_to_text(self):
     passage = convert.from_standard(self._load_xml('./standard3.xml'))
     self.assertEqual(convert.to_text(passage, False)[0],
                      '1 2 3 4 . 6 7 8 9 10 . 12 13 14 15')
     self.assertSequenceEqual(convert.to_text(passage, True),
                              ['1 2 3 4 .', '6 7 8 9 10 .', '12 13 14 15'])
Example #23
0
 def test_from_standard(self):
     passage = convert.from_standard(
         TestUtil.load_xml("test_files/standard3.xml"))
     ref = convert.from_site(TestUtil.load_xml("test_files/site3.xml"))
     self.assertTrue(passage.equals(ref, ordered=True))
Example #24
0
 def __init__(self, *args, **kwargs):
     super(ParserTests, self).__init__(*args, **kwargs)
     Config("", "-m", "test")
     self.passage = convert.from_standard(TestUtil.load_xml('test_files/standard3.xml'))
Example #25
0
    starts = [0, len(tokens)]
    return [
        ' '.join(tokens[starts[i]:starts[i + 1]])
        for i in range(len(starts) - 1)
    ]


index = list(range(0, 1500))

for t in index:
    f1 = open('test.en.tupa_parsed/newsdiscusstest2015-enfr-src_%s.xml' % t)
    xml_string1 = f1.read()
    f1.close()
    xml_object1 = fromstring(xml_string1)
    P1 = convert.from_standard(xml_object1)
    L1 = get_Hscenes(P1)
    L2 = get_EAscenes(P1)[0]
    C2 = get_EAscenes(P1)[1]
    T = get_passage(P1)
    D = to_word_text(P1)

    split12 = []
    for h in L1:
        D1 = get_difference(h, L2, C2)
        split12.append(D1)

    S1 = sum(split12, [])
    s = open('12r%s.txt' % t, 'w')
    if S1 != []:
        s.write('%s\n' % S1)