Ejemplo n.º 1
0
def file_dist(rep_out_path, conv_dir1, conv_dir2):
    rep_out = RepertoireOutput()
    convDir1 = DiffToFileMapping(conv_dir1)
    convDir2 = DiffToFileMapping(conv_dir2)

    convDir1.walk_dir()
    convDir2.walk_dir()

    fileDist = {}
    rep_out.loadFromFile(rep_out_path,1)

#    print rep_out.clones

    for cloneIdx, (clone1, clone2, metric) in rep_out.getCloneIter():
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2

        diff_file1 = os.path.basename(rep_out.getFilePath(fidx1))
        diff_file1 = os.path.splitext(diff_file1)[0]
        start1 = str(start1)
        diff_file2 = os.path.basename(rep_out.getFilePath(fidx2))
        diff_file2 = os.path.splitext(diff_file2)[0]
        start2 = str(start2)

        src_file1 = convDir1.diff2file.get((diff_file1,start1), -1)
        src_file2 = convDir2.diff2file.get((diff_file2,start2), -1)

        #taking directory upto depth 3
        if src_file1 == -1 or src_file2 == -1:
            continue

        temp_name = src_file1.split("_")
        src_dir1 = temp_name[0] + os.sep + temp_name[1] + os.sep + temp_name[2]

        temp_name = src_file2.split("_")
        src_dir2 = temp_name[0] + os.sep + temp_name[1] + os.sep + temp_name[2]

#        key = (src_file1,src_file2)
        key = (src_dir1,src_dir2)
        if (fileDist.has_key(key) == 0):
            fileDist[key] = 0
        m = metric.partition(':')[0]
        m = m.lstrip('(')

        fileDist[key] += int(m)

    return fileDist
Ejemplo n.º 2
0
def convert_ccfx_output(pb, proj, lang, is_new):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    #for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
    filter_path = pb.getFilterOutputPath(proj, lang)
    conv_path = pb.getLineMapPath(proj, lang, is_new)
    ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
    ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
    print "filter_path = " + filter_path
    print "conv_path = " + conv_path
    print "ccfx_i_path = " + ccfx_i_path
    print "ccfx_p_path = " + ccfx_p_path
    for name in os.listdir(filter_path):
        meta = CCFXMetaData(
            ccfx_i_path + name,
            ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
            conv_path + pb.makeLineMapFileName(name), filter_path + name)
        metaDB.addFile(meta)

    print metaDB
    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is False:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if cline.rstrip().startswith('"'):  #filename-->skip the line
                continue

            dstIdx, srcIdx, op, changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx numbers from 1, but pidx is from 0
            pidx2orig[pidx + 1] = input2orig.get(inputIdx, -1)
        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
        lang, is_new, is_tmp=False)
    ccfx_out = RepertoireOutput()
    ccfx_out.loadFromFile(ccfx_out_path)

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        print fileIdx
        print path
        if not metaDB.hasInputPath(path):
            raise Exception(
                "Couldn't find meta information for file: {0}".format(path))
        print ">>>>>>> " + path
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    for cloneIdx, (clone1, clone2) in ccfx_out.getCloneIter():
        op1 = []
        op2 = []
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1 + 1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2 + 1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        for i in range(start1, end1 + 1):
            op = meta1.line2op.get(i, "X")
            op1.append((i, op))

        for i in range(start2, end2 + 1):
            op = meta2.line2op.get(i, "X")
            op2.append((i, op))

        clone1 = (fidx1, start1, end1, op1)
        clone2 = (fidx2, start2, end2, op2)
        if clone1[0] < clone2[0]:
            clone = (clone1, clone2)
        else:
            clone = (clone2, clone1)
        clones[cloneIdx] = clone

    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
def convert_ccfx_output(pb, proj, lang, is_new):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    #for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
    filter_path = pb.getFilterOutputPath(proj, lang)
    conv_path   = pb.getLineMapPath(proj, lang, is_new)
    ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
    ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
    print "filter_path = " + filter_path
    print "conv_path = " + conv_path
    print "ccfx_i_path = " + ccfx_i_path
    print "ccfx_p_path = " + ccfx_p_path
    for name in os.listdir(filter_path):
        meta = CCFXMetaData(
                ccfx_i_path + name,
                ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
                conv_path + pb.makeLineMapFileName(name),
                filter_path + name)
        metaDB.addFile(meta)

    print metaDB
    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is False:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if  cline.rstrip().startswith('"'): #filename-->skip the line
                continue

            dstIdx,srcIdx,op,changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx numbers from 1, but pidx is from 0
            pidx2orig[pidx + 1] = input2orig.get(inputIdx, -1)
        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
            lang, is_new, is_tmp = False)
    ccfx_out = RepertoireOutput()
    ccfx_out.loadFromFile(ccfx_out_path)

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        print fileIdx
        print path
        if not metaDB.hasInputPath(path):
            raise Exception(
                    "Couldn't find meta information for file: {0}".format(
                        path))
        print ">>>>>>> " + path
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    for cloneIdx, (clone1, clone2) in ccfx_out.getCloneIter():
        op1 = []
        op2 = []
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1+1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2+1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        for i in range(start1,end1+1):
            op = meta1.line2op.get(i, "X")
            op1.append((i,op))

        for i in range(start2,end2+1):
            op = meta2.line2op.get(i, "X")
            op2.append((i,op))

        clone1 = (fidx1, start1, end1, op1)
        clone2 = (fidx2, start2, end2, op2)
        if clone1[0] < clone2[0]:
            clone = (clone1, clone2)
        else:
            clone = (clone2, clone1)
        clones[cloneIdx] = clone

    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
Ejemplo n.º 4
0
def convert_ccfx_output(pb, lang, is_new, debug = False):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
        filter_path = pb.getFilterOutputPath(proj, lang)
        conv_path   = pb.getLineMapPath(proj, lang, is_new)
        ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
        ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
        for name in os.listdir(filter_path):
            meta = CCFXMetaData(
                    ccfx_i_path + name,
                    ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
                    conv_path + pb.makeLineMapFileName(name),
                    filter_path + name)
            metaDB.addFile(meta)

    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is True:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        last_dst = last_src = 0
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if  cline.rstrip().startswith('"'): #filename-->skip the line
                continue

            dstIdx,srcIdx,op,changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
            last_dst = int(dstIdx) + 1
            last_src = int(srcIdx) + 1
        # ccfx cares about the end of file, which isn't represented by our mappings
        input2orig[last_dst] = last_src
        origline2op[last_src] = "NOCHANGE"
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx output has numbers like 0-131, meaning that pidx
            # is meant to be taken from 0
            pidx2orig[pidx] = input2orig.get(inputIdx, -1)
            if debug and input2orig.get(inputIdx, -1) == -1:
                print "failed to translate from pidx to original: {0} -> {1}".format(pidx, inputIdx)
                print "    file: " + meta.ccfxInput

        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
            lang, is_new, is_tmp = False)
    ccfx_out = RepertoireOutput()
    if debug:
        print 'loading from ccfx output file: {0}'.format(ccfx_out_path)
    ccfx_out.loadFromFile(ccfx_out_path)
    if debug:
        print "finished loading ccfx output."

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        if not metaDB.hasInputPath(path):
            raise Exception(
                    "Couldn't find meta information for file: {0}".format(
                        path))
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    # rewrite the line numbers to index into filter_output files
    for clone_idx, clone_pair in ccfx_out.getCloneIter():
        fidx1, start1, end1, op1 = clone_pair.clone1
        fidx2, start2, end2, op2 = clone_pair.clone2
        metric = clone_pair.metric
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1 + 1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2 + 1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        if (start1 == -1 or start2 == -1 or
                end1 == -1 or end2 == -1):
            if debug:
                print 'line translation failed for ' + str(clone_pair)
            # don't even try to translate a clonew with bad indices
            # this usually means we somehow dumped an empty file on
            # ccfx and we can't translate the eof token correctly
            # enabling debug should verify this
            continue


        for i in range(start1, end1 + 1):
            op = meta1.line2op.get(i, "X")
            op1.append(Operation(i,op))

        for i in range(start2, end2 + 1):
            op = meta2.line2op.get(i, "X")
            op2.append(Operation(i,op))


        clone1 = Clone(fidx1, start1, end1, op1)
        clone2 = Clone(fidx2, start2, end2, op2)
        if clone1.fidx < clone2.fidx:
            unsplit_clone = ClonePair(clone1, clone2, metric)
        else:
            unsplit_clone = ClonePair(clone2, clone1, metric)

        # split into hunks, add those hunks into our final output
        clone_pairs = split_clone_into_hunks(unsplit_clone, debug)
        for clone_pair in clone_pairs:
            clones[len(clones)] = clone_pair


    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
Ejemplo n.º 5
0
#    print fileIdx
#    print path
    if not metaDB.hasInputPath(path):
        print "Couldn't find meta information for file: ", path
	sys.exit(-1)
    meta = metaDB.getMetaForPath(path)
    files[fileIdx] = meta.filterOutput

clones = {}

for cloneIdx, (clone1, clone2) in ccfx_out.getCloneIter():
    op1 = []
    op2 = []
    fidx1, start1, end1 = clone1
    fidx2, start2, end2 = clone2
    meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
    meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

    start1 = meta1.prepIdx2OrigIdx.get(start1+1, -1)
    end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
    start2 = meta2.prepIdx2OrigIdx.get(start2+1, -1)
    end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

    for i in range(start1,end1+1):
        op = meta1.line2op.get(i, "X")
        op1.append((i,op))

    for i in range(start2,end2+1):
        op = meta2.line2op.get(i, "X")
        op2.append((i,op))