Beispiel #1
0
def codeMetricsTable(nameLabel, dataFrame, sourceFilesDirectory, storageConnection):
	import storage
	import scmData as scm
	import dataUtilities

	tableName = nameLabel + '_codeMetrics'
	if storage.tableExists(tableName, storageConnection):
		return storage.readTable(tableName, storageConnection)

	scm.gitlog.switchToRevision(sourceFilesDirectory,nameLabel)

	
	metricsData = collectMetrics(os.path.join(os.getcwd(),"..","ck"), sourceFilesDirectory)

	#if not metricsData:
		#return None

	#metricsData = pd.concat(metricsData)

	metricsData = dropUnusedMetrics(metricsData)
	#metricsDataFrame = metricsData.rename(columns={"file":"entity"})
	#metricsData = dataUtilities.formatEntityNames(metricsData, sourceFilesDirectory)

	storage.writeTable(tableName, storageConnection, metricsData)

	return metricsData
Beispiel #2
0
def makeCommitDateMapping(sourceDirectory,
                          filesToInspect,
                          storageConnection,
                          branchname='master'):
    import storage

    tableName = branchname + '_commitDates'
    if storage.tableExists(tableName, storageConnection):
        return storage.readTable(tableName, storageConnection)

    switchToRevision(sourceDirectory, branchname)

    command = "cd %s; git log --pretty=format:'%%h,%%ad' --date=short -- %s" % (
        sourceDirectory, filesToInspect)
    try:
        dates = pd.read_csv(StringIO(
            subprocess.check_output(command, shell=True)),
                            names=['sha', 'date'],
                            dtype={
                                'sha': str,
                                'date': str
                            })
        storage.writeTable(tableName, storageConnection, dates)
    except subprocess.CalledProcessError:
        dates = "no commit date mapping available"

    return dates
def indentMetricsTable(nameLabel,
                       dataFrame,
                       sourceFilesDirectory,
                       storageConnection,
                       tabsize=4):
    import storage
    import scmData as scm
    import dataUtilities

    tableName = nameLabel + '_indentMetrics'
    if storage.tableExists(tableName, storageConnection):
        return storage.readTable(tableName, storageConnection)

    scm.gitlog.switchToRevision(sourceFilesDirectory, nameLabel)
    indentMetrics = pd.DataFrame()
    for entityName in dataFrame['entity']:
        nextFrame = indentCountStats(sourceFilesDirectory + entityName,
                                     tabsize)
        nextFrame['entity'] = sourceFilesDirectory + entityName
        indentMetrics = pd.concat([indentMetrics, nextFrame])

    if indentMetrics.empty:
        return None

    indentMetrics = dataUtilities.formatEntityNames(indentMetrics,
                                                    sourceFilesDirectory)

    storage.writeTable(tableName, storageConnection, indentMetrics)
    return indentMetrics
def codeMetricsTable(nameLabel, dataFrame, sourceFilesDirectory, storageConnection):
	import storage
	import scmData as scm
	import dataUtilities

	tableName = nameLabel + '_codeMetrics'
	if storage.tableExists(tableName, storageConnection):
		return storage.readTable(tableName, storageConnection)

	scm.gitlog.switchToRevision(sourceFilesDirectory,nameLabel)

	metricsData = []

	for entityName in dataFrame['entity']:
		analysis = runAnalysisOnFile(sourceFilesDirectory+entityName)
		metricsData.append(analysis)

	if not metricsData:
		return None

	metricsData = pd.concat(metricsData)

	metricsData = dropUnusedCMetrics(metricsData)

	metricsData = dataUtilities.formatEntityNames(metricsData, sourceFilesDirectory)

	storage.writeTable(tableName, storageConnection, metricsData)

	return metricsData
Beispiel #5
0
def makeGitLog(sourceDirectory, filesToInspect, lastTimestamp,
               previousTimestamp, storageConnection):
    import storage
    tableName = 'gitLog_' + lastTimestamp + '_' + previousTimestamp

    if storage.tableExists(tableName, storageConnection):
        return

    command = "cd %s; git log %s...%s --pretty=format:'[%%h] %%aN %%ad %%s' --date=short --date-order --numstat  -- %s" % (
        sourceDirectory, lastTimestamp, previousTimestamp, filesToInspect)
    gitlog = subprocess.check_output(command, shell=True)

    storage.writeFile(tableName, storageConnection, gitlog)
Beispiel #6
0
def changeMetricsTable(lastTime, previousTime, logFileType, storageConnection):
    import storage

    tableNameChange = lastTime + '_changeMetrics'
    if storage.tableExists(tableNameChange, storageConnection):
        return storage.readTable(tableNameChange, storageConnection)

    tableNameLog = 'gitLog_' + lastTime + '_' + previousTime
    scmLogPath = storage.readFile(tableNameLog,
                                  storageConnection,
                                  toTemporaryFile=True)

    results = runAnalysisList(scmLogPath, logFileType, entityAnalysisTypes)

    changeMetrics = pd.read_csv(results[0])
    for result in results[1:]:
        changeMetrics = changeMetrics.merge(pd.read_csv(result), on='entity')

    changeMetrics.drop('n-revs_y', axis=1, inplace=True)
    changeMetrics = changeMetrics.rename(columns={'n-revs_x': 'n-revs'})

    storage.writeTable(tableNameChange, storageConnection, changeMetrics)

    return changeMetrics