Esempio n. 1
0
def insert_inf_elt(src, defn):
    elts = xmllexer.lex(src)
    li = len(elts) - 1
    while li > 0 and elts[li].int_type != xmllexer.IntervalType.closetag:
        li -= 1
    offs = elts[li].offs
    return src[:offs] + os.linesep + defn + os.linesep + src[offs:]
Esempio n. 2
0
def insert_inf_elt(src, defn):
    elts = xmllexer.lex(src)
    li = len(elts) - 1
    while li > 0 and elts[li].int_type != xmllexer.IntervalType.closetag:
        li -= 1
    offs = elts[li].offs
    return src[:offs] + os.linesep + defn + os.linesep + src[offs:]
Esempio n. 3
0
    def __init__(self, fileName):
        global write_reformatted_sources

        self.elementZones = []
        self.fileName = fileName
        self.text = None
        self.offsets = []
        offset = 0
        with open(fileName, encoding='utf-8') as ifs:
            self.lines = []
            for line in ifs:
                self.offsets.append(offset)
                # lst = line.rstrip() + '\n' # leave leading blanks + add separator (we do not need \r, so no os.linesep)
                lst = line.replace('\r', '').replace('\n',
                                                     '') + '\n'  # leave leading blanks + add separator (we do not need \r, so no os.linesep)
                lst = lst.replace('\t', '    ')  # Clone Miner is very brave to consider TAB equal to 4 spaces
                offset += len(lst)
                self.lines.append(lst)
            self.offsets.append(offset)
            self.text = "".join(self.lines)
        if write_reformatted_sources:
            with open(fileName + ".reformatted", 'w+', encoding='utf-8', newline='\n') as ofs:
                ofs.write(self.text)

        # then calculate XML zones
        marker = XMLZoneMarker(self)

        global checkmarkup
        if checkmarkup:  # -cmup no and -cmup shrink do not need this
            marker.discover()
            self.zones, self.rzones = marker.zones, marker.rzones
            self.textzoneoffsets = marker.textzoneoffsets
            self.textzoneends = marker.textzoneends
            self.textzones = marker.textzones
        else:
            marker.discoverURLs()
        self.urlzones = marker.urlzones


        # calculate tag coordinates using pygments lexer (hope correctly)
        self.lexintervals = xmllexer.lex(self.text)
Esempio n. 4
0
def insert_dict_entry(src, defn):
    elts = xmllexer.lex(src)

    dopentag = os.linesep + """<d:Dictionary id="doc_clone_finder">""" + os.linesep
    dclosetag = os.linesep + "</d:Dictionary>" + os.linesep

    offs = -1
    for e in reversed(elts):
        if e.int_type == xmllexer.IntervalType.opentag and e.srepr == dopentag:
            offs = e.end
            break

    if offs == -1:
        li = len(elts) - 1
        while li > 0 and elts[li].int_type != xmllexer.IntervalType.closetag:
            li -= 1
        offs = elts[li].offs

        return src[:offs] + dopentag + defn + dclosetag + src[offs:]
    else:
        return src[:offs] + os.linesep + defn + src[offs:]
Esempio n. 5
0
def insert_dict_entry(src, defn):
    elts = xmllexer.lex(src)

    dopentag = os.linesep + """<d:Dictionary id="doc_clone_finder">""" + os.linesep
    dclosetag = os.linesep + "</d:Dictionary>" + os.linesep

    offs = -1
    for e in reversed(elts):
        if e.int_type == xmllexer.IntervalType.opentag and e.srepr == dopentag:
            offs = e.end
            break

    if offs == -1:
        li = len(elts) - 1
        while li > 0 and elts[li].int_type != xmllexer.IntervalType.closetag:
            li -= 1
        offs = elts[li].offs

        return src[:offs] + dopentag + defn + dclosetag + src[offs:]
    else:
        return src[:offs] + os.linesep + defn + src[offs:]
Esempio n. 6
0
def create_reuse_entry(tinput: "str", clone_desc: "str", drl_elt_type: "str") -> "tuple(str, str)":
    """
    Converts clone instances to dictionary entry references
    :param tinput: input DRL source
    :param clone_desc: clone group descriptor
    :return: tuple of output text and dictionary element source
    """
    cd = CandidateDescriptor(clone_desc)

    drl_elts = {
        "dict": ("""<d:Entry id="%s">%s</d:Entry>""", """<d:DictRef entryid="%s" dictid="doc_clone_finder" />%s"""),
        "infelt": ("""<d:InfElement id="%s">%s</d:InfElement>""", """<d:InfElemRef infelemid="%s">%s</d:InfElemRef>"""),
    }
    drl_defs, drl_refs = drl_elts[drl_elt_type]

    if drl_elt_type != "infelt" and cd.variative:
        logging.fatal("Requested to refactor variative unit, but not information element")
        assert False

    def xs(xel):
        return "".join([el.srepr for el in xel])

    eid = str(uuid.uuid4())

    whole_instances = cd.get_whole_instances(tinput)
    # outer balancing using outer instance #0
    outer_def_prepend, outer_def_append, outer_ref_prepend, outer_ref_append = xmlfixup.balance_unbalanced_text(
        xmllexer.lex(whole_instances[0])
    )

    # prepend and appends for extension points
    vps_pa = []
    vps = cd.get_variative_parts(tinput)
    for vp in vps:
        # balancing using variative part #0
        vps_pa.append(xmlfixup.balance_unbalanced_text(xmllexer.lex(vp[0])))

    # definition
    vardefs = []
    cnt = 0
    for inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append in vps_pa:
        cnt += 1
        vardefs.append(xs(inner_ref_prepend) + ("""<d:Nest id="%s.%d"/>""" % (eid, cnt)) + xs(inner_ref_append))

    condefs = cd.get_constant_parts(tinput)

    bodydef = "".join([cv[0] + cv[1] for cv in zip(condefs, vardefs + [""])])

    complete_def = drl_defs % (eid, xs(outer_def_prepend) + bodydef + xs(outer_def_append))

    # reference
    vrefs = []
    for vpv in vps:
        replace_nests = []
        cnt = 0
        for vp, pa in zip(vpv, vps_pa):
            cnt += 1
            inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append = pa
            replace_nests.append(
                ("""<d:Replace-Nest nestid="%s.%d">""" % (eid, cnt))
                + xs(inner_ref_prepend)
                + vp
                + xs(inner_ref_append)
                + """</d:Replace-Nest>"""
            )
        vrefs.append(xs(outer_ref_prepend) + drl_refs % (eid, "".join(replace_nests)) + xs(outer_ref_append))

    if not len(vrefs):
        # non-variative one
        vrefs = [xs(outer_ref_prepend) + (drl_refs % (eid, "")) + xs(outer_ref_append)]

    # instance intervals
    intervals = cd.get_whole_instance_coordinates(tinput)

    restext = replace_str_intervals_with(tinput, vrefs, intervals)

    return restext, complete_def
Esempio n. 7
0
    # going left
    stack = []
    back_intervals = list(covered_intervals)
    back_intervals.reverse()
    for i in back_intervals:
        if i.int_type == xmllexer.IntervalType.closetag:
            stack.append(i)
        elif i.int_type == xmllexer.IntervalType.opentag and len(stack):
            stack.pop()

    for i in stack:
        elem_prepend.append(i.create_opposite_tag())
        ref_prepend.insert(0, i)

    return elem_prepend, elem_append, ref_prepend, ref_append


# just a test
if __name__ == '__main__':
    src = """t0</a>t1</b>t2<c>t3<d>t4"""
    ints = xmllexer.lex(src)
    p, a, rp, ra = balance_unbalanced_text(ints)
    print("".join([pi.srepr for pi in p]))
    print(src)
    print("".join([ai.srepr for ai in a]))
    print("============")
    print("".join([pi.srepr
                   for pi in rp]) + "<REF/>" + "".join([pi.srepr
                                                        for pi in ra]))
Esempio n. 8
0
    for i in stack:
        elem_append.insert(0, i.create_opposite_tag())
        ref_append.append(i)

    # going left
    stack = []
    back_intervals = list(covered_intervals)
    back_intervals.reverse()
    for i in back_intervals:
        if i.int_type == xmllexer.IntervalType.closetag:
            stack.append(i)
        elif i.int_type == xmllexer.IntervalType.opentag and len(stack):
            stack.pop()

    for i in stack:
        elem_prepend.append(i.create_opposite_tag())
        ref_prepend.insert(0, i)

    return elem_prepend, elem_append, ref_prepend, ref_append

# just a test
if __name__ == '__main__':
    src = """t0</a>t1</b>t2<c>t3<d>t4"""
    ints = xmllexer.lex(src)
    p, a, rp, ra = balance_unbalanced_text(ints)
    print("".join([pi.srepr for pi in p]))
    print(src)
    print("".join([ai.srepr for ai in a]))
    print("============")
    print("".join([pi.srepr for pi in rp]) + "<REF/>" + "".join([pi.srepr for pi in ra]))
Esempio n. 9
0
def create_reuse_entry(tinput: 'str', clone_desc: 'str',
                       drl_elt_type: 'str') -> 'tuple(str, str)':
    """
    Converts clone instances to dictionary entry references
    :param tinput: input DRL source
    :param clone_desc: clone group descriptor
    :return: tuple of output text and dictionary element source
    """
    cd = CandidateDescriptor(clone_desc)

    drl_elts = {
        "dict": ("""<d:Entry id="%s">%s</d:Entry>""",
                 """<d:DictRef entryid="%s" dictid="doc_clone_finder" />%s"""),
        "infelt": ("""<d:InfElement id="%s">%s</d:InfElement>""",
                   """<d:InfElemRef infelemid="%s">%s</d:InfElemRef>""")
    }
    drl_defs, drl_refs = drl_elts[drl_elt_type]

    if drl_elt_type != 'infelt' and cd.variative:
        logging.fatal(
            "Requested to refactor variative unit, but not information element"
        )
        assert False

    def xs(xel):
        return ''.join([el.srepr for el in xel])

    eid = str(uuid.uuid4())

    whole_instances = cd.get_whole_instances(tinput)
    # outer balancing using outer instance #0
    outer_def_prepend, outer_def_append, outer_ref_prepend, outer_ref_append = \
        xmlfixup.balance_unbalanced_text(xmllexer.lex(whole_instances[0]))

    # prepend and appends for extension points
    vps_pa = []
    vps = cd.get_variative_parts(tinput)
    for vp in vps:
        # balancing using variative part #0
        vps_pa.append(xmlfixup.balance_unbalanced_text(xmllexer.lex(vp[0])))

    # definition
    vardefs = []
    cnt = 0
    for inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append in vps_pa:
        cnt += 1
        vardefs.append(
            xs(inner_ref_prepend) + ("""<d:Nest id="%s.%d"/>""" % (eid, cnt)) +
            xs(inner_ref_append))

    condefs = cd.get_constant_parts(tinput)

    bodydef = ''.join([cv[0] + cv[1] for cv in zip(condefs, vardefs + [''])])

    complete_def = drl_defs % (eid, xs(outer_def_prepend) + bodydef +
                               xs(outer_def_append))

    # reference
    vrefs = []
    for vpv in vps:
        replace_nests = []
        cnt = 0
        for vp, pa in zip(vpv, vps_pa):
            cnt += 1
            inner_def_prepend, inner_def_append, inner_ref_prepend, inner_ref_append = pa
            replace_nests.append(("""<d:Replace-Nest nestid="%s.%d">""" %
                                  (eid, cnt)) + xs(inner_ref_prepend) + vp +
                                 xs(inner_ref_append) +
                                 """</d:Replace-Nest>""")
        vrefs.append(
            xs(outer_ref_prepend) + drl_refs % (eid, ''.join(replace_nests)) +
            xs(outer_ref_append))

    if not len(vrefs):
        # non-variative one
        vrefs = [
            xs(outer_ref_prepend) + (drl_refs % (eid, '')) +
            xs(outer_ref_append)
        ]

    # instance intervals
    intervals = cd.get_whole_instance_coordinates(tinput)

    restext = replace_str_intervals_with(tinput, vrefs, intervals)

    return restext, complete_def