Example #1
0
    def process_clone(self):
        rep_output = RepertoireOutput()
        print self.repOutputPath
        rep_output.loadFromFile(self.repOutputPath,1)

        for index, fileName in rep_output.getFileIter():
            if config.DEBUG == 2:
                print "fileName =" + fileName
            self.fileList.append(fileName)
            listItem = QtGui.QTreeWidgetItem(self.ui.fileList)
            listItem.setText(0,str(index))
            listItem.setText(1,fileName)

        for indx,(cl1, cl2,metric) in rep_output.getCloneIter():
            fidx1, start1, end1 = cl1
            fidx2, start2, end2 = cl2
            metric = int(metric)
#            metric = max(end1 - start1, end2 - start2)
            

            if (config.DEBUG == 2):
                print line
                print indx + " " + cl1 + " " + cl2
                print metric

            if metric:
                listItem = QtGui.QTreeWidgetItem(self.ui.cloneList)
                listItem.setText(0, str(indx))
                listItem.setText(1,"{0}.{1}-{2}\t{3}.{4}-{5}".format(
                    fidx1, start1, end1, fidx2, start2, end2))
                listItem.setText(2,str(metric))
Example #2
0
    def generateDB(self, vcs1, vcs2):
        # a mapping from ccfx input file path -> file idx
        # note that since we're loading a ton of output files
        # these indices are necessarily being rewritten
        file2fidx = {}
        # a mapping from fidx -> num ported lines in the corresponding file
        fidx2numports = {}
        # a list of CloneMeta
        # once again, we're rewriting indices for the very same reasons
        clones = []

        output_tuples = []
        for is_new in [False, True]:
            for lang in ['cxx', 'hxx', 'java']:
                ccfx_output_path = (
                        self.pb.getRepertoireOutputPath(lang, is_new) +
                        self.pb.getRepertoireOutputFileName(lang, is_new))
                if not os.path.exists(ccfx_output_path):
                    continue
                output = RepertoireOutput()
                output.loadFromFile(ccfx_output_path, True)
                output_tuples.append(OutputTuple(lang, is_new, output))

        for output_tuple in output_tuples:
            self.firstPass(output_tuple.output, clones, file2fidx, fidx2numports)

        # map from commit id -> CommitMeta
        commits = {}
        fidx2commitid = {}
        for vcs in (vcs1, vcs2):
            vcs.populateDB(commits, fidx2commitid, file2fidx, fidx2numports)

        # at this point, we have clones filled out, except that we need commit
        # ids for files involved and the # ports per commit haven't been filled
        # out
        for clone in clones:
            if not clone.lhs.fileId in fidx2commitid:
                print "Going down looking for fidx {0} ({1})".format(
                        clone.lhs.fileId, file2fidx[clone.lhs.fileId])
            if not clone.rhs.fileId in fidx2commitid:
                print "Going down looking for fidx {0} ({1})".format(
                        clone.rhs.fileId, file2fidx[clone.rhs.fileId])

            clone.lhsCommitId = fidx2commitid[clone.lhs.fileId]
            clone.rhsCommitId = fidx2commitid[clone.rhs.fileId]

        return RepDB(commits, clones)
Example #3
0
def dumpRepOut(rep_out_path, pickle_output_path):
    repOut = RepertoireOutput()
    repOut.loadFromFile(rep_out_path,1)
    clones = []

    for cloneIdx, (clone1, clone2, metric) in repOut.getCloneIter():
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2

        lhs = SideOfClone(fidx1,start1,end1)
        rhs = SideOfClone(fidx2,start2,end2)

        lhs_commit_id = fidx1 #for time being
        rhs_commit_id = fidx2 #for time being

        ClMeta = CloneMeta(cloneIdx,lhs,lhs_commit_id,rhs,rhs_commit_id,metric)
        clones.append(ClMeta)

    pickle.dump( clones, open( pickle_output_path, "wb" ) )
Example #4
0
    def process_clone(self):
        rep_output = RepertoireOutput()
        print self.repOutputPath
        rep_output.loadFromFile(self.repOutputPath, 1)

        for index, fileName in rep_output.getFileIter():
            if config.DEBUG == 2:
                print "fileName =" + fileName
            self.fileList.append(fileName)
            listItem = QtGui.QTreeWidgetItem(self.ui.fileList)
            listItem.setText(0, str(index))
            listItem.setText(1, fileName)

        for indx, (cl1, cl2, metric) in rep_output.getCloneIter():
            fidx1, start1, end1 = cl1
            fidx2, start2, end2 = cl2
            metric = int(metric)
            #            metric = max(end1 - start1, end2 - start2)

            if (config.DEBUG == 2):
                print line
                print indx + " " + cl1 + " " + cl2
                print metric

            if metric:
                listItem = QtGui.QTreeWidgetItem(self.ui.cloneList)
                listItem.setText(0, str(indx))
                listItem.setText(
                    1,
                    "{0}.{1}-{2}\t{3}.{4}-{5}".format(fidx1, start1, end1,
                                                      fidx2, start2, end2))
                listItem.setText(2, str(metric))
Example #5
0
def file_dist(rep_out_path, conv_dir1, conv_dir2):
    rep_out = RepertoireOutput()
    convDir1 = DiffToFileMapping(conv_dir1)
    convDir2 = DiffToFileMapping(conv_dir2)

    convDir1.walk_dir()
    convDir2.walk_dir()

    fileDist = {}
    rep_out.loadFromFile(rep_out_path,1)

#    print rep_out.clones

    for cloneIdx, (clone1, clone2, metric) in rep_out.getCloneIter():
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2

        diff_file1 = os.path.basename(rep_out.getFilePath(fidx1))
        diff_file1 = os.path.splitext(diff_file1)[0]
        start1 = str(start1)
        diff_file2 = os.path.basename(rep_out.getFilePath(fidx2))
        diff_file2 = os.path.splitext(diff_file2)[0]
        start2 = str(start2)

        src_file1 = convDir1.diff2file.get((diff_file1,start1), -1)
        src_file2 = convDir2.diff2file.get((diff_file2,start2), -1)

        #taking directory upto depth 3
        if src_file1 == -1 or src_file2 == -1:
            continue

        temp_name = src_file1.split("_")
        src_dir1 = temp_name[0] + os.sep + temp_name[1] + os.sep + temp_name[2]

        temp_name = src_file2.split("_")
        src_dir2 = temp_name[0] + os.sep + temp_name[1] + os.sep + temp_name[2]

#        key = (src_file1,src_file2)
        key = (src_dir1,src_dir2)
        if (fileDist.has_key(key) == 0):
            fileDist[key] = 0
        m = metric.partition(':')[0]
        m = m.lstrip('(')

        fileDist[key] += int(m)

    return fileDist
Example #6
0
def convert_ccfx_output(pb, proj, lang, is_new):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    #for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
    filter_path = pb.getFilterOutputPath(proj, lang)
    conv_path = pb.getLineMapPath(proj, lang, is_new)
    ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
    ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
    print "filter_path = " + filter_path
    print "conv_path = " + conv_path
    print "ccfx_i_path = " + ccfx_i_path
    print "ccfx_p_path = " + ccfx_p_path
    for name in os.listdir(filter_path):
        meta = CCFXMetaData(
            ccfx_i_path + name,
            ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
            conv_path + pb.makeLineMapFileName(name), filter_path + name)
        metaDB.addFile(meta)

    print metaDB
    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is False:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if cline.rstrip().startswith('"'):  #filename-->skip the line
                continue

            dstIdx, srcIdx, op, changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx numbers from 1, but pidx is from 0
            pidx2orig[pidx + 1] = input2orig.get(inputIdx, -1)
        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
        lang, is_new, is_tmp=False)
    ccfx_out = RepertoireOutput()
    ccfx_out.loadFromFile(ccfx_out_path)

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        print fileIdx
        print path
        if not metaDB.hasInputPath(path):
            raise Exception(
                "Couldn't find meta information for file: {0}".format(path))
        print ">>>>>>> " + path
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    for cloneIdx, (clone1, clone2) in ccfx_out.getCloneIter():
        op1 = []
        op2 = []
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1 + 1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2 + 1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        for i in range(start1, end1 + 1):
            op = meta1.line2op.get(i, "X")
            op1.append((i, op))

        for i in range(start2, end2 + 1):
            op = meta2.line2op.get(i, "X")
            op2.append((i, op))

        clone1 = (fidx1, start1, end1, op1)
        clone2 = (fidx2, start2, end2, op2)
        if clone1[0] < clone2[0]:
            clone = (clone1, clone2)
        else:
            clone = (clone2, clone1)
        clones[cloneIdx] = clone

    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
def convert_ccfx_output(pb, proj, lang, is_new):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    #for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
    filter_path = pb.getFilterOutputPath(proj, lang)
    conv_path   = pb.getLineMapPath(proj, lang, is_new)
    ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
    ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
    print "filter_path = " + filter_path
    print "conv_path = " + conv_path
    print "ccfx_i_path = " + ccfx_i_path
    print "ccfx_p_path = " + ccfx_p_path
    for name in os.listdir(filter_path):
        meta = CCFXMetaData(
                ccfx_i_path + name,
                ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
                conv_path + pb.makeLineMapFileName(name),
                filter_path + name)
        metaDB.addFile(meta)

    print metaDB
    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is False:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if  cline.rstrip().startswith('"'): #filename-->skip the line
                continue

            dstIdx,srcIdx,op,changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx numbers from 1, but pidx is from 0
            pidx2orig[pidx + 1] = input2orig.get(inputIdx, -1)
        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
            lang, is_new, is_tmp = False)
    ccfx_out = RepertoireOutput()
    ccfx_out.loadFromFile(ccfx_out_path)

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        print fileIdx
        print path
        if not metaDB.hasInputPath(path):
            raise Exception(
                    "Couldn't find meta information for file: {0}".format(
                        path))
        print ">>>>>>> " + path
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    for cloneIdx, (clone1, clone2) in ccfx_out.getCloneIter():
        op1 = []
        op2 = []
        fidx1, start1, end1 = clone1
        fidx2, start2, end2 = clone2
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1+1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2+1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        for i in range(start1,end1+1):
            op = meta1.line2op.get(i, "X")
            op1.append((i,op))

        for i in range(start2,end2+1):
            op = meta2.line2op.get(i, "X")
            op2.append((i,op))

        clone1 = (fidx1, start1, end1, op1)
        clone2 = (fidx2, start2, end2, op2)
        if clone1[0] < clone2[0]:
            clone = (clone1, clone2)
        else:
            clone = (clone2, clone1)
        clones[cloneIdx] = clone

    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
def convert_ccfx_output(pb, lang, is_new, debug = False):
    metaDB = CCFXMetaMapping()
    # maps from ccfx input paths to meta objects representing the files
    for proj in [PathBuilder.PROJ0, PathBuilder.PROJ1]:
        filter_path = pb.getFilterOutputPath(proj, lang)
        conv_path   = pb.getLineMapPath(proj, lang, is_new)
        ccfx_i_path = pb.getCCFXInputPath(proj, lang, is_new)
        ccfx_p_path = pb.getCCXFPrepPath(proj, lang, is_new)
        for name in os.listdir(filter_path):
            meta = CCFXMetaData(
                    ccfx_i_path + name,
                    ccfx_p_path + pb.findPrepFileFor(ccfx_p_path, name),
                    conv_path + pb.makeLineMapFileName(name),
                    filter_path + name)
            metaDB.addFile(meta)

    # we have our files, now map line numbers in the prep files to input files
    for meta in metaDB.getMetas():

        if config.DEBUG is True:
            print "prep file = " + meta.ccfxPrep
            print "conv file = " + meta.filterConv

        prepHandler = open(meta.ccfxPrep, 'r')
        prep = prepHandler.readlines()
        prepHandler.close()

        convHandler = open(meta.filterConv, 'r')
        conv = convHandler.readlines()
        convHandler.close()

        input2orig = {}
        pidx2orig = {}
        origline2op = {}
        # build a map of line numbers in ccfx_input to filtered diff line
        last_dst = last_src = 0
        for i, cline in enumerate(conv):
            if i < 2:
                continue
            if  cline.rstrip().startswith('"'): #filename-->skip the line
                continue

            dstIdx,srcIdx,op,changId = cline.split(',')
            input2orig[int(dstIdx)] = int(srcIdx)
            origline2op[int(srcIdx)] = op
            last_dst = int(dstIdx) + 1
            last_src = int(srcIdx) + 1
        # ccfx cares about the end of file, which isn't represented by our mappings
        input2orig[last_dst] = last_src
        origline2op[last_src] = "NOCHANGE"
        for pidx, pline in enumerate(prep):
            inputIdx = int(pline.partition(".")[0], 16)
            # ccfx output has numbers like 0-131, meaning that pidx
            # is meant to be taken from 0
            pidx2orig[pidx] = input2orig.get(inputIdx, -1)
            if debug and input2orig.get(inputIdx, -1) == -1:
                print "failed to translate from pidx to original: {0} -> {1}".format(pidx, inputIdx)
                print "    file: " + meta.ccfxInput

        meta.prepIdx2OrigIdx = pidx2orig
        meta.line2op = origline2op

    ccfx_out_path = pb.getCCFXOutputPath() + pb.getCCFXOutputFileName(
            lang, is_new, is_tmp = False)
    ccfx_out = RepertoireOutput()
    if debug:
        print 'loading from ccfx output file: {0}'.format(ccfx_out_path)
    ccfx_out.loadFromFile(ccfx_out_path)
    if debug:
        print "finished loading ccfx output."

    files = {}
    for fileIdx, path in ccfx_out.getFileIter():
        if not metaDB.hasInputPath(path):
            raise Exception(
                    "Couldn't find meta information for file: {0}".format(
                        path))
        meta = metaDB.getMetaForPath(path)
        files[fileIdx] = meta.filterOutput

    clones = {}

    # rewrite the line numbers to index into filter_output files
    for clone_idx, clone_pair in ccfx_out.getCloneIter():
        fidx1, start1, end1, op1 = clone_pair.clone1
        fidx2, start2, end2, op2 = clone_pair.clone2
        metric = clone_pair.metric
        meta1 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx1))
        meta2 = metaDB.getMetaForPath(ccfx_out.getFilePath(fidx2))

        start1 = meta1.prepIdx2OrigIdx.get(start1 + 1, -1)
        end1 = meta1.prepIdx2OrigIdx.get(end1, -1)
        start2 = meta2.prepIdx2OrigIdx.get(start2 + 1, -1)
        end2 = end2 = meta2.prepIdx2OrigIdx.get(end2, -1)

        if (start1 == -1 or start2 == -1 or
                end1 == -1 or end2 == -1):
            if debug:
                print 'line translation failed for ' + str(clone_pair)
            # don't even try to translate a clonew with bad indices
            # this usually means we somehow dumped an empty file on
            # ccfx and we can't translate the eof token correctly
            # enabling debug should verify this
            continue


        for i in range(start1, end1 + 1):
            op = meta1.line2op.get(i, "X")
            op1.append(Operation(i,op))

        for i in range(start2, end2 + 1):
            op = meta2.line2op.get(i, "X")
            op2.append(Operation(i,op))


        clone1 = Clone(fidx1, start1, end1, op1)
        clone2 = Clone(fidx2, start2, end2, op2)
        if clone1.fidx < clone2.fidx:
            unsplit_clone = ClonePair(clone1, clone2, metric)
        else:
            unsplit_clone = ClonePair(clone2, clone1, metric)

        # split into hunks, add those hunks into our final output
        clone_pairs = split_clone_into_hunks(unsplit_clone, debug)
        for clone_pair in clone_pairs:
            clones[len(clones)] = clone_pair


    rep_out = RepertoireOutput()
    rep_out.loadFromData(files, clones)
    return rep_out
Example #9
0
        # ccfx numbers from 1, but pidx is from 0
        pidx2orig[pidx + 1] = input2orig.get(inputIdx, -1)
    meta.prepIdx2OrigIdx = pidx2orig
    meta.line2op = origline2op

print 'map line number finished'
# write all keys in metaDB to a new file
metaDB_key_file = 'metaDB_key_file.txt'
with open(metaDB_key_file, 'w') as f:
    for key in metaDB.name2meta.keys():
        f.write(key+'\n')
print 'write key finished'


ccfx_out_path = 'cross_file.txt'
ccfx_out = RepertoireOutput()
print 'load clone data from file starts...'
ccfx_out.loadFromFile(ccfx_out_path)
print 'load clone data from file ends...'
print 'parsed files: ', len(ccfx_out.files)
print 'parsed clones: ', len(ccfx_out.clones)


files = {}
for fileIdx, path in ccfx_out.getFileIter():
#    print fileIdx
#    print path
    if not metaDB.hasInputPath(path):
        print "Couldn't find meta information for file: ", path
	sys.exit(-1)
    meta = metaDB.getMetaForPath(path)