def use_diff(): for j in (0, 1): for source_line in statements[j].getSourceLines(): indent1 = re.findall('^\s*', source_line)[0] indent2 = indent1.replace('\t', 4 * ' ') source_line = re.sub( '^' + indent1, indentations[j].index(indent2) * ' ', source_line) source_lines[j].append(source_line) d = diff_highlight([('\n'.join(source_lines[j])) for j in [0, 1]]) d = [ format_line_code(d[i].replace('\n', '<BR>\n')) for i in [0, 1] ] d = [ d[i].replace(very_strange_const, ' ') for i in (0, 1) ] u = anti_unification.Unifier(statements[0], statements[1]) return d, u
def writeReport(self, file_name): # TODO REWRITE! This function code was created in a hurry eclipse_start = '\n<!--ECLIPSE START-->' eclipse_end = '\n<!--ECLIPSE END-->' def format_line_code(s): s = s.replace('\t', ' ') s = s.replace(' ', ' ') return '<span style="font-family: monospace;">%s</span>' % (s, ) errors_info = "\n".join([ '<P> <FONT COLOR=RED> %s </FONT> </P>' % (error_info.replace('\n', '<BR>'), ) for error_info in self._error_info ]) very_strange_const = 'VERY_STRANGE_CONST' clone_descriptions = [] for clone_i in range(len(self._clones)): try: clone = self._clones[clone_i] s = '<P>' s += '<B>Clone # %d</B><BR>' % (clone_i + 1, ) # s = '<P> Clone detected in source files "%s" and "%s" <BR>\n' % (sequences[0].getSourceFile().getFileName(), sequences[1].getSourceFile().getFileName()) s += 'Distance between two fragments = %d <BR>' % ( clone.calcDistance()) s += 'Clone size = ' + str( max([ len(set(clone[i].getCoveredLineNumbers())) for i in [0, 1] ])) s += '<TABLE NOWRAP WIDTH=100% BORDER=1>' s += eclipse_start s += '<TR>' for j in [0, 1]: s += '<TD> <a href="clone://%s?%d&%d"> Go to this fragment in Eclipse </a> </TD>' % ( clone[j].getSourceFile().getFileName(), min(clone[j][0].getCoveredLineNumbers()), max(clone[j][-1].getCoveredLineNumbers())) if j == 0: s += '<TD></TD>' s += '</TR>' s += eclipse_end for j in [0, 1]: s += '<TD>' s += 'Source file "%s"<BR>' % ( clone[j].getSourceFile().getFileName(), ) if clone[j][0].getCoveredLineNumbers() == []: # TODO remove after... pdb.set_trace() s += 'The first line is %d' % ( min(clone[j][0].getCoveredLineNumbers()) + 1, ) s += '</TD>' if j == 0: s += '<TD></TD>' s += '</TR>' for i in range(clone[0].getLength()): s += '<TR>\n' t = [] statements = [clone[j][i] for j in [0, 1]] #print statements[0].__class__ def diff_highlight(seqs): s = difflib.SequenceMatcher(lambda x: x == '<BR>\n') s.set_seqs(seqs[0], seqs[1]) blocks = s.get_matching_blocks() if not ((blocks[0][0] == 0) and (blocks[0][1] == 0)): blocks = [(0, 0, 0)] + blocks r = ['', ''] for i in range(len(blocks)): block = blocks[i] for j in [0, 1]: r[j] += escape(seqs[j][block[j]:block[j] + block[2]]) if (i < (len(blocks) - 1)): nextblock = blocks[i + 1] for j in [0, 1]: r[j] += '<span'+very_strange_const+'style="color:rgb(255,0,0);">%s</span>'%\ (escape(seqs[j][block[j]+block[2]:nextblock[j]]),) return r # preparation of indentation indentations = (set(), set()) for j in (0, 1): for source_line in statements[j].getSourceLines(): indentations[j].add( re.findall('^\s*', source_line)[0].replace( '\t', 4 * ' ')) indentations = (list(indentations[0]), list(indentations[1])) indentations[0].sort() indentations[1].sort() source_lines = ([], []) def use_diff(): for j in (0, 1): for source_line in statements[j].getSourceLines(): indent1 = re.findall('^\s*', source_line)[0] indent2 = indent1.replace('\t', 4 * ' ') source_line = re.sub( '^' + indent1, indentations[j].index(indent2) * ' ', source_line) source_lines[j].append(source_line) d = diff_highlight([('\n'.join(source_lines[j])) for j in [0, 1]]) d = [ format_line_code(d[i].replace('\n', '<BR>\n')) for i in [0, 1] ] d = [ d[i].replace(very_strange_const, ' ') for i in (0, 1) ] u = anti_unification.Unifier(statements[0], statements[1]) return d, u if arguments.use_diff: (d, u) = use_diff() #print u.getSize() else: try: def rec_correct_as_string(t1, t2, s1, s2): def highlight(s): return '<span style="color: rgb(255, 0, 0);">' + s + '</span>' class NewAsString: def __init__(self, s): self.s = highlight(s) def __call__(self): return self.s def set_as_string_node_parent(t): if not isinstance(t, AbstractSyntaxTree): t = t.getParent() n = NewAsString(t.ast_node.as_string()) t.ast_node.as_string = n if (t1 in s1) or (t2 in s2): for t in (t1, t2): set_as_string_node_parent(t) return assert (len(t1.getChilds()) == len( t2.getChilds())) for i in range(len(t1.getChilds())): c1 = t1.getChilds()[i] c2 = t2.getChilds()[i] rec_correct_as_string(c1, c2, s1, s2) (s1, s2) = (statements[0], statements[1]) u = anti_unification.Unifier(s1, s2) rec_correct_as_string( s1, s2, u.getSubstitutions()[0].getMap().values(), u.getSubstitutions()[1].getMap().values()) d = [None, None] for j in (0, 1): d[j] = statements[j].ast_node.as_string() lines = d[j].split('\n') for ii in range(len(lines)): temp_line = '' jj = 0 try: while lines[ii][jj] == ' ': temp_line += ' ' jj += 1 except IndexError: # suppress errors if line has no leading spaces pass temp_line += lines[ii][jj:] lines[ii] = temp_line d[j] = '\n'.join(lines) d[j] = d[j].replace('\n', '<BR>\n') except: print 'The following error occured during highlighting of differences on the AST level:' traceback.print_exc() print 'using diff highlight' (d, u) = use_diff() for j in [0, 1]: t.append('<TD>\n' + d[j] + '</TD>\n') if u.getSize() > 0: color = 'RED' else: color = 'AQUA' s += t[0] + '<TD style="width: 10px;" BGCOLOR=%s> </TD>' % ( color, ) + t[1] s += '</TR>\n' s += '</TABLE> </P> <HR>' clone_descriptions.append(s) except: print "Clone info can't be written to the report. " traceback.print_exc() descr = """<P>Source files: %d</P> <a href = "javascript:unhide('files');">Click here to show/hide file names</a><div id="files" class="hidden"><P><B>Source files:</B><BR>%s</P></div> <P>Clones detected: %d</P> <P>%d of %d lines are duplicates (%.2f%%) </P> <P> <B>Parameters<BR> </B> clustering_threshold = %d<BR> distance_threshold = %d<BR> size_threshold = %d<BR> hashing_depth = %d<BR> clusterize_using_hash = %s<BR> clusterize_using_dcup = %s<BR> </P> """ % (len(self._file_names), ', <BR>'.join( self._file_names), len(self._clones), self.covered_source_lines_count, self.all_source_lines_count, (not self.all_source_lines_count and 100) or 100 * self.covered_source_lines_count / float(self.all_source_lines_count), arguments.clustering_threshold, arguments.distance_threshold, arguments.size_threshold, arguments.hashing_depth, str(arguments.clusterize_using_hash), str(arguments.clusterize_using_dcup)) if arguments.print_time: timings = '' timings += '<B>Time elapsed</B><BR>' timings += '<BR>\n'.join( ['%s : %.2f seconds' % (i[0], i[1]) for i in self._timers]) timings += '<BR>\n Total time: %.2f' % (self.getTotalTime()) timings += '<BR>\n Started at: ' + self._timers[0][2] timings += '<BR>\n Finished at: ' + self._timers[-1][2] else: timings = '' marks_report = '' if self._mark_to_statement_hash: marks_report += '<P>Top 20 statement marks:' marks = self._mark_to_statement_hash.keys() marks.sort(lambda y, x: cmp(len(self._mark_to_statement_hash[x]), len(self._mark_to_statement_hash[y]))) counter = 0 for mark in marks[:20]: counter += 1 marks_report += '<BR>' + str( len(self._mark_to_statement_hash[mark]) ) + ':' + str( mark.getUnifierTree() ) + "<a href=\"javascript:unhide('stmt%d');\">show/hide representatives</a> " % ( counter, ) marks_report += '<div id="stmt%d" class="hidden"> <BR>' % ( counter, ) for statement in self._mark_to_statement_hash[mark]: marks_report += str(statement) + '<BR>' marks_report += '</div>' marks_report += '</P>' warnings = '' if arguments.use_diff: warnings += '<P>(*) Warning: the highlighting of differences is based on diff and doesn\'t reflect the tree-based clone detection algorithm.</P>' save_to = eclipse_start + '<b><a href="file://%s">Save this report</a></b>' % ( file_name, ) + eclipse_end HTML_code = """ <HTML> <HEAD> <TITLE> CloneDigger Report </TITLE> <script type="text/javascript"> function unhide(divID) { var item = document.getElementById(divID); if (item) { item.className=(item.className=='hidden')?'unhidden':'hidden'; } } </script> <style type="text/css"> .hidden { display: none; } .unhidden { display: block; } .preformatted { border: 1px dashed #3c78b5; font-size: 11px; font-family: Courier; margin: 10px; line-height: 13px; } .preformattedHeader { background-color: #f0f0f0; border-bottom: 1px dashed #3c78b5; padding: 3px; text-align: center; } .preformattedContent { background-color: #f0f0f0; padding: 3px; } <!-- <div class="preformatted"><div class="preformattedContent"> <pre>Clone Digger </pre> </div></div> --> </style> </HEAD> <BODY> %s %s %s %s %s %s %s <HR> Clone Digger is aimed to find software clones in Python and Java programs. It is provided under the GPL license and can be downloaded from the site <a href="http://clonedigger.sourceforge.net">http://clonedigger.sourceforge.net</a> </BODY> </HTML>""" % (errors_info, save_to, descr, timings, '<BR>\n'.join(clone_descriptions), marks_report, warnings) f = open(file_name, 'w') f.write(re.sub(eclipse_start + '.*?' + eclipse_end, '', HTML_code)) f.close() if arguments.eclipse_output: f = open(arguments.eclipse_output, 'w') f.write(HTML_code) f.close()
def calcDistance(self): import anti_unification trees = [s.constructTree() for s in self] unifier = anti_unification.Unifier(trees[0], trees[1]) return unifier.getSize()