def updateGoldStandardJsonWithIds(gsCrawl): resultJson = os.path.join(gsCrawl, RESULT_JSON) if not os.path.exists(resultJson): print("Result JSON not found at : {0}".format(resultJson)) return None resultJsonData = importJson(resultJson) gsJson = os.path.join(gsCrawl, 'gs', GS_JSON_NAME) if not os.path.exists(gsJson): print("GS JSON not found at : {0}".format(gsJson)) return None gsJsonData = importJson(gsJson) if(gsJsonData == None): print("Error Import Gold Standard Json") return states = resultJsonData['states'] gsStates = gsJsonData['states'] noId = False for state in states: if state in gsStates: if 'id' not in gsStates[state]: noId = True gsStates[state]['id'] = states[state]['id'] if noId: shutil.copy2(gsJson, os.path.join(gsCrawl, 'gs', 'gsResults_noId.json')) with open(gsJson, "w") as write_file: json.dump(gsJsonData, write_file)
def getNewDiscoveries(jsonData, oldGsCrawl, crawl): newDiscoveries = [] gsJson = os.path.join(oldGsCrawl, 'gs', GS_JSON_NAME) if not os.path.exists(gsJson): print("GS JSON not found at : {0}".format(gsJson)) return None gsJsonData = importJson(gsJson) allGsBins = getAllBins(gsJsonData['states']) allCrawlBins = getAllBins(jsonData['states']) binRepresentatives = getBinRepresentatives(jsonData['states']) for crawlBin in allCrawlBins: if crawlBin == "": continue if crawlBin not in allGsBins: newDiscoveries.append({crawlBin:binRepresentatives[crawlBin]}) newDiscoveriesPath = os.path.join(crawl, 'comp_output', 'newDiscoveries.json') try: with open(newDiscoveriesPath, 'w') as write_file: json.dump(newDiscoveries, write_file) except Exception as ex: print("Could not wirte new Discoveries file to {0}".format(newDiscoveriesPath)) print(ex) return len(newDiscoveries)
def testGetCoverage(): gsJson =importJson("src/main/resources/GoldStandards/petclinic/crawl-petclinic-60min/gs/gsResults.json") states = gsJson['states'] print(getNumBins(states))
def testGetSAF(): testConfigData = importJson("../ALLCRAWLS/out/petclinic/petclinic_DEFAULT_-1.0_60mins/localhost/crawl0/config.json") getSAF(testConfigData)
def analyze(gsCrawl, crawl, appName, thresholdEntry, preDefinedSaveJsonLocation=None): status = None gsJson = os.path.join(gsCrawl, 'gs', GS_JSON_NAME) if not os.path.exists(gsJson): print("GS JSON not found at : {0}".format(gsJson)) status = STATUS_JSON_NOT_FOUND return status, None gsJsonData = importJson(gsJson) saveJsonName = getCrawlFolderName(crawl) + "_" + VERIFIED_CLASSIFICATION_JSON_NAME verifClassificationJson = os.path.join(crawl, 'comp_output', VERIFIED_CLASSIFICATION_JSON_NAME) HTML_OUTPUT_PATH = os.path.join(os.path.abspath(crawl), "html_classification") if (not os.path.exists(verifClassificationJson)) and (preDefinedSaveJsonLocation!=None) and (os.path.exists(HTML_OUTPUT_PATH)): saveJsons = find(saveJsonName, preDefinedSaveJsonLocation) if len(saveJsons) > 0: saveJson = saveJsons[0] try: shutil.copy2(saveJson, verifClassificationJson) shutil.move(saveJson, saveJson + "_backup") print("Moved classification_verified.json to {0}".format(verifClassificationJson)) except Exception as ex: print(ex) print("Could not move classification_verified to the right location.") if not os.path.exists(verifClassificationJson): classificationJson = os.path.join(crawl, 'comp_output', GENERATED_CLASSIFICATION_JSON_NAME) if not os.path.exists(classificationJson): print("classification json not found {0}".format(classificationJson)) done = createClassificationJson(gsCrawl, crawl, appName) status= STATUS_CREATED_CLASSIFICATION_HTML return status,None jsonData = importJson(classificationJson) OUTPUT_HTML_NAME = "classification.html" TITLE = "Classification HTML" preLoadedBins = getAllBins(gsJsonData['states']) singleCrawlOutput(crawl, HTML_OUTPUT_PATH, OUTPUT_HTML_NAME, TITLE, outputJson = jsonData, saveJsonName = saveJsonName, preLoadedBins = preLoadedBins ) status = STATUS_CREATED_CLASSIFICATION_HTML return status, None else: jsonData = importJson(verifClassificationJson) resultJson = os.path.join(crawl, RESULT_JSON) if not os.path.exists(resultJson): print("Result JSON not found at : {0}".format(resultJson)) status = STATUS_JSON_NOT_FOUND return status,None configJson = os.path.join(crawl, CONFIG_JSON) if not os.path.exists(configJson): print("Config JSON not found at : {0}".format(configJson)) status = STATUS_JSON_NOT_FOUND return status,None resultJsonData = importJson(resultJson) configJsonData = importJson(configJson) if 'newBins' in jsonData: toAdd, errored = addToGoldStandard(jsonData['newBins'], gsCrawl, crawl, gsJsonData, jsonData) if len(errored) >0: print("Some bins could not be added to the Gold Standard. Find out why ") toAddStates = toAdd['states'] toAddPairs = toAdd['pairs'] if len(toAddPairs) != 0: newStatesJson = gsJsonData['states'] newPairsJson = gsJsonData['pairs'] for state in toAddStates: newStatesJson[state] = toAddStates[state] for pair in toAddPairs: newPairsJson.append(pair) newGSJsonData = {'states' : newStatesJson, 'pairs': newPairsJson} writeNewGoldStandardJsonAndHtml(newGSJsonData, gsCrawl) status = STATUS_GS_UPDATED return status,None stats = getStats(jsonData, gsJsonData, resultJsonData, configJsonData, crawl, thresholdEntry) if os.path.exists(os.path.abspath(gsCrawl)+UNALTERED_GS_TAG): newDiscoveries = getNewDiscoveries(jsonData, os.path.abspath(gsCrawl) + UNALTERED_GS_TAG, crawl) stats["newDiscoveries"] = newDiscoveries else: stats["newDiscoveries"] = 0 print(stats) status = STATUS_STATS_CREATED return status, stats
def testNormalizeHyst(): pDiffRawCSVs = find('*Hyst-raw.csv', '/Test/gt10/') totalNormalized = 0 totalUncalculated = 0 totalupdated = 0 totalSynced = 0 appsToRerun=[] appsNormalized= [] allZeroApps = [] for pDiffRawCSV in pDiffRawCSVs: # pDiffRawCSV = "/Test/gt10/windowsvc.com/crawl0/comp_output/crawl0-VISUAL-PDiff-raw.csv" # if pDiffRawCSV !=None: print(pDiffRawCSV) path, file = os.path.split(pDiffRawCSV) folders = splitPathIntoFolders(pDiffRawCSV) #print(folders) appName = folders[2] crawl = folders[1] pixelSizeJson = os.path.join(os.path.abspath(path), '', 'pixelSizes.json') pixelSizes = importJson(pixelSizeJson) pDiffPairs = getAllPairsFromCSV(pDiffRawCSV) normalized, uncalculated, raw, needToRerun, allZero, pDiffPairsNormalized = normalizePDiff(pDiffPairs, pixelSizes) totalNormalized += normalized totalUncalculated+= uncalculated if needToRerun: appsToRerun.append(appName) if raw: appsNormalized.append(appName) fieldnames = getFieldNames(pDiffRawCSV) dst = os.path.join(path, '', crawl+'-VISUAL-Hyst-normalized.csv') writeNormalizedCSV(pDiffPairsNormalized, fieldnames, dst) # try: # testdb = '/Test/DS.db' # connectToDB(testdb) # updatedPairs, ignoredPairs, sameValuePairs, errorPairs = updateDB(pDiffPairsNormalized, appName, crawl, str(ALGOS.VISUAL_PDIFF).split('.')[1]) # totalupdated += updatedPairs # totalSynced += updatedPairs # totalSynced += sameValuePairs # print("Updated : {0}, Ignored not present in db: {1}, Ignored same Value : {2}, Errored : {3} db records".format(updatedPairs, ignoredPairs, sameValuePairs, errorPairs)) # except Exception as ex: # print(e) # print("Encountered exception while updating records") # finally: # closeDBConnection() if allZero: allZeroApps.append(appName) print("Noramlized total {0} Hyst distances from {1} apps".format(totalNormalized, len(pDiffRawCSVs))) print("Found total {0} uncalculated Hyst distances from {1} apps".format(totalUncalculated, len(pDiffRawCSVs))) print("appsToRerun : {0}" + str(len(appsToRerun))) print("rawApps : " + str(len(appsNormalized))) print("allZeroApps : " + str(len(allZeroApps))) print("intersection of appsToRerun and allZero. : " + str(len(list(set(appsToRerun) & set(allZeroApps))))) # print("Updated total {0} db records from {1} csvs".format(totalupdated, len(updatedCSVs))) # print("Synced total {0} db records from {1} csvs".format(totalSynced, len(updatedCSVs))) with open("Output.txt", "w") as text_file: for app in appsToRerun: print(app, file=text_file)
def singleCrawlOutput( CRAWL_PATH, OUTPUT_PATH, OUTPUT_HTML_NAME, TITLE, outputJson=None, saveJsonName="gsResults.json", preLoadedBins=None, overwrite=None, RESOURCES=os.path.join(os.path.abspath("../HTMLStuff/resources/"), ''), TEMPLATE_DIR=os.path.join(os.path.abspath("../HTMLStuff/"), ''), TEMPLATE_HTML="gsTemplate.html"): TEMPLATE_HTML_PATH = os.path.abspath( os.path.join(TEMPLATE_DIR, TEMPLATE_HTML)) IMAGES = "images" SCREENSHOTS = "screenshots" CRAWL_RESULT_JSON_FILE = os.path.abspath( os.path.join(CRAWL_PATH, "result.json")) outputPathConfirmed, OUTPUT_PATH = confirmOutputPath(OUTPUT_PATH, RESOURCES, overwrite=overwrite) if not outputPathConfirmed: # print("USAGE : program <DB_PATH> <DB_NAME> <CRAWL_PATH> <OUTPUT_PATH> <OUTPUT_HTML_NAME> <NUMBEROFPAIRS>") return if outputJson != None: outputJsonString = json.dumps(outputJson) output_html_doc(TEMPLATE_DIR, TEMPLATE_HTML, os.path.join(OUTPUT_PATH, OUTPUT_HTML_NAME), outputJsonString, TITLE, saveJsonName=saveJsonName, preLoadedBins=preLoadedBins) return resultJson = importJson(CRAWL_RESULT_JSON_FILE) states = resultJson['states'] outputJson = {'states': {}, 'pairs': []} sortedStates = sorted(states.items(), key=lambda x: x[1]['id']) for state1 in sortedStates: # print(state1[0]) i = state1[1]['id'] stateOutput = {} stateOutput['name'] = state1[1]['name'] stateOutput['bin'] = "" stateOutput['clones'] = [] stateOutput['id'] = i stateOutput['url'] = state1[1]['url'] if 'timeAdded' in state1[1]: stateOutput['timeAdded'] = state1[1]['timeAdded'] else: stateOutput['timeAdded'] = i if 'url' in state1[1]: stateOutput['url'] = state1[1]['url'] # print(stateOutput) outputJson['states'][state1[0]] = stateOutput for state2 in sortedStates: j = state2[1]['id'] if (j <= i): continue pair = {} pair['state1'] = state1[0] pair['state2'] = state2[0] pair['response'] = -1 pair['inferred'] = 0 pair['tags'] = [] pair['comments'] = "" outputJson['pairs'].append(pair) # print(pair) outputJsonString = json.dumps(outputJson) print(outputJsonString) output_html_doc(TEMPLATE_DIR, TEMPLATE_HTML, os.path.join(OUTPUT_PATH, OUTPUT_HTML_NAME), outputJsonString, TITLE, saveJsonName=saveJsonName, preLoadedBins=preLoadedBins)