def fnProcessLine(mysLine,mysType): TRC.trace(3,"proc ProcessLine1 lines|%s| code|%s| blank|%s| short|%s| comment|%s|" % (g.getAll())) # A blank line is blank in any language. if ( re.match("^\s*\r?$",mysLine) ): g.incBlank() # P H A S E 1 : L O O K F O R S E Q U E N C E S # Test the line in language-specific ways. # As ugly and complex as all this looks, it works only # for pretty vanilla cases. It is still too simple # to get a correct answer in all cases. # C C P P H H P P J A V A S C A L A if ( re.match("^(c|cpp|h|hpp|java|scala)$",mysType,re.I) ): TRC.trace(5,"proc match line as C, Java, or Scala") g.bCommentOnly = re.match("^\s*(\/\*.*\*\/|\/\/.*)\s*$",mysLine) g.bCommentBegin = re.match("^.*(\/\*|\/\/).*$",mysLine) g.bCommentEnd = re.match("^.*(\*\/|\/\/).*$",mysLine) if ( g.bCommentBegin ): g.bCodeCommentBegin = re.match("^\s*\S+.*(\/\*|\/\/).*$",mysLine) # S H K S H elif ( re.match("^(sh|ksh)$",mysType,re.I) ): TRC.trace(5,"proc match line as Shell script") g.bCommentOnly = re.match("^\s*#.*\s*$",mysLine) g.bCommentBegin = re.match("^.*#.*$",mysLine) g.bCommentEnd = re.match("^.*#.*$",mysLine) if ( g.bCommentBegin ): g.bCodeCommentBegin = re.match("^\s*\S+.*(#).*$",mysLine) TRC.trace(3,"proc s=|%s|%s| type=|%s| blank=|%s| short=|%s| commentonly=|%s| commentbegin=|%s| commentend=|%s| codebegin=|%s|" % (g.nLines, mysLine, mysType, g.bBlank, g.bShort, g.bCommentOnly, g.bCommentBegin, g.bCommentEnd, g.bCodeCommentBegin)) # P E R L A W K R elif ( re.match("^(pl|awk|r)$",mysType,re.I) ): TRC.trace(5,"proc match line as Perl or Awk") g.bCommentOnly = re.match("^\s*#.*\s*$",mysLine) g.bCommentBegin = re.match("^.*#.*$",mysLine) g.bCommentEnd = re.match("^.*#.*$",mysLine) if ( g.bCommentBegin ): g.bCodeCommentBegin = re.match("^\s*\S+.*(#).*$",mysLine) # P Y T H O N elif ( re.match("^(py|pm)$",mysType,re.I) ): TRC.trace(5,"proc match line as Python") g.bShort = 0 g.bCommentOnly = re.match("^\s*#.*\s*$",mysLine) g.bCommentBegin = re.match("^.*#.*$",mysLine) g.bCommentEnd = re.match("^.*#.*$",mysLine) if ( g.bCommentBegin ): g.bCodeCommentBegin = re.match("^\s*\S+.*(#).*$",mysLine) TRC.trace(5,"proc hashcomment only|%s| begin|%s| end|%s|" % (tf(g.bCommentOnly),tf(g.bCommentBegin),tf(g.bCommentEnd))) # E x p e r i m e n t a l : try to find block comments in Python. # Makes me crazy to deal with apostrophe and quote both. # Beginning or ending of block comment? g.bCommentBlockAloneApost = re.match("^\s*(\'{3})\s*$",mysLine) g.bCommentBlockAloneQuote = re.match("^\s*(\"{3})\s*$",mysLine) # Comment with non-blank text before or after the triple-thing? # All of these are wrong, I know. Any code on the # same line could innocently contain apostrophes or quotes. g.bCommentBlockTextAfterApost = re.match("^\s*(\'{3}).*[^\s\']+.*$",mysLine) g.bCommentBlockTextAfterQuote = re.match("^\s*(\"{3}).*[^\s\"]+.*$",mysLine) g.bCommentBlockTextBeforeApost = re.match("^.*[^\s\']+.*(\'{3})\s*$",mysLine) g.bCommentBlockTextBeforeQuote = re.match("^.*[^\s\"]+.*(\"{3})\s*$",mysLine) g.bCommentBlockTextBothApost = re.match("^\s*\S+.*(\'{3})\s*\S+.*$",mysLine) g.bCommentBlockTextBothQuote = re.match("^\s*\S+.*(\"{3})\s*\S+.*$",mysLine) # Does block comment begin and end on the same line? g.bCommentBlockSingleApost = re.match("^\s*(\'{3})[^\']*(\'{3})\s*$",mysLine) g.bCommentBlockSingleQuote = re.match("^\s*(\"{3})[^\']*(\"{3})\s*$",mysLine) # How many block comments on this line? # (Why would I care?) mIntrosApost = re.findall("(\'{3})",mysLine) g.nCommentIntroCountApost = len( mIntrosApost ) % 2 mIntrosQuote = re.findall("(\"{3})",mysLine) g.nCommentIntroCountQuote = len( mIntrosQuote ) % 2 TRC.trace(5,"proc singlequote pyblock1 alone|%s| txtafter|%s| txtbefore|%s| txtboth|%s| single|%s| count|%s| inblock|%s|" % (tf(g.bCommentBlockAloneApost),tf(g.bCommentBlockTextAfterApost),tf(g.bCommentBlockTextBeforeApost),tf(g.bCommentBlockTextBothApost),tf(g.bCommentBlockSingleApost),g.nCommentIntroCountApost,g.bInCommentRegionApost)) TRC.trace(5,"proc doublequote pyblock1 alone|%s| txtafter|%s| txtbefore|%s| txtboth|%s| single|%s| count|%s| inblock|%s|" % (tf(g.bCommentBlockAloneQuote),tf(g.bCommentBlockTextAfterQuote),tf(g.bCommentBlockTextBeforeQuote),tf(g.bCommentBlockTextBothQuote),tf(g.bCommentBlockSingleQuote),g.nCommentIntroCountQuote,g.bInCommentRegionQuote)) # E n d o f e x p e r i m e n t a l j u n k . # X M L X S L H T M H T M L X H T M L elif ( re.match("^(xsl|xml|htm|html)$",mysType,re.I) ): TRC.trace(5,"proc match line as XML or HTML") g.bShort = 0 g.bCommentOnly = re.match("^\s*\<!--.*--\>\s*$",mysLine) g.bCommentBegin = re.match("^.*(<!--).*$",mysLine) g.bCommentEnd = re.match("^.*(-->).*$",mysLine) if ( g.bCommentBegin ): g.bCodeCommentBegin = re.match("^\s*(\S+.*)(<!--).*$",mysLine) # B A T C M D elif ( re.match("^(bat|cmd)$",mysType,re.I) ): TRC.trace(5,"proc match line as Batch script") g.bShort = 0 g.bCommentOnly = re.match("^\s*REM.*$",mysLine) g.bCommentBegin = re.match("^\s*REM.*$",mysLine) g.bCommentEnd = re.match("^\s*REM.*$",mysLine) g.bCodeCommentBegin = 0 # M A K elif ( re.match("^(mak)$",mysType,re.I) ): TRC.trace(5,"proc match line as makefile") g.bShort = 0 g.bCommentOnly = re.match("^\s*#.*$",mysLine) g.bCommentBegin = re.match("^[^\t].*[^\\]#.*$",mysLine) g.bCommentEnd = re.match("^.*[^\\]#.*$",mysLine) if ( g.bCommentBegin ): g.bCodeCommentBegin = re.match("^\s*\S+.*([^\\]#).*$",mysLine) # S E D P R O P E R T I E S elif ( re.match("^(properties|sed)$",mysType,re.I) ): TRC.trace(5,"proc match line as sed script") g.bShort = 0 g.bCommentOnly = re.match("^\s*#.*$",mysLine) g.bCommentBegin = re.match("^\s*#.*$",mysLine) g.bCommentEnd = re.match("^\s*#.*$",mysLine) g.bCodeCommentBegin = 0 # I N I elif ( re.match("^(ini)$",mysType,re.I) ): TRC.trace(5,"proc match line as INI file") g.bShort = 0 g.bCommentOnly = re.match("^\s*[#;].*$",mysLine) g.bCommentBegin = re.match("^\s*[#;].*$",mysLine) g.bCommentEnd = re.match("^\s*[#;].*$",mysLine) g.bCodeCommentBegin = 0 # A N Y T H I N G E L S E else: TRC.trace(5,"proc match line as other random stuff") g.bShort = 0 pass # P H A S E 2 : C L A S S I F Y A N D C O U N T L I N E S # Now count the various types of lines. g.incLines() if ( g.bBlank ): g.incBlank() if ( g.bShort ): g.incShort() if ( g.bCommentEnd ): bInCommentRegion = 0 # P Y T H O N if ( re.match("^(py)$",mysType,re.I) ): # All the reasonable logic here has been moved to the # fnnEvalPythonCommentBlocksNEW routine. # For no particularly good reason: it's no clearer there than here. fnnEvalPythonCommentBlocksNEW() # N O T P Y T H O N else: if ( g.bCommentOnly ): # Comment line, not code line. g.incComment() elif ( g.bCommentBegin ): g.bInCommentRegion = 0 if ( not g.bCommentEnd ): # Start of comment region, fersherr. g.bInCommentRegion = 1 if ( not g.bCodeCommentBegin ): # If code on line, then don't count this line as comment. g.incComment() elif ( (not g.bCommentBegin) and g.bCommentEnd ): # End of comment region. g.bInCommentRegion = 0 g.incComment() else: # Vanilla line. # If inside comment region, then comment, else code. if ( g.bInCommentRegion ): g.incComment() TRC.trace(3,"proc ProcessLine2 lines|%s| code|%s| blank|%s| short|%s| comment|%s|" % (g.getAll())) return
return 1 else: return 0 if __name__ == "__main__": if len(argv) <= 1: print "Usage: python %s input-filespec" print " Line out = filename, filetype, total, code, comment, blank, short" print " Output one line to stdout." exit(0) sFilename = argv[1] mFileext = re.match("^.*\.([^\.]+)$",sFilename,re.I) sFileext = mFileext.group(1) TRC.tracef(3,"MAIN","proc fname|%s| match|%s| ext|%s|" % (sFilename,mFileext,sFileext)) g = G() # Instantiate all the global data. fnProcessFile(sFilename,sFileext) # Do all this crap to the file. TRC.trace(3,"proc afterfile lines|%s| code|%s| blank|%s| short|%s| comment|%s|" % (g.getAll())) # The conservative way to calculate code lines is to remove # everything that we saw that absolutely is not a line of code. # Blanks and comment-only lines are not code for sure. # Short lines are arguable, so we report them separately. (total,zerocode,blank,short,comment) = g.getAll() g.nCode = total - blank - short - comment # Code is what's left over. # Finally, the single line of output for this file. print "%s\t%s\t%d\t%d\t%d\t%d\t%d" % \ (sFilename,sFileext,total,g.nCode,comment,blank,short)