def getTrain4Test(self, indexCV=0): dataTrain = [[], [], []] dataValid = [[], [], []] with open( os.path.join(UtilPath.Datasets(), self.project, self.typeDataset, self.release4Test, "train" + str(indexCV) + ".csv")) as f: train = csv.reader(f) for i, row in enumerate(train): dataTrain[0].append(row[0]) dataTrain[1].append([float(x) for x in row[2:]]) dataTrain[2].append(int(row[1])) dataTrain[1] = np.array(dataTrain[1]) dataTrain[2] = np.array(dataTrain[2]) print(dataTrain[1].shape) with open( os.path.join(UtilPath.Datasets(), self.project, self.typeDataset, self.release4Test, "valid" + str(indexCV) + ".csv")) as f: valid = csv.reader(f) for i, row in enumerate(valid): dataValid[0].append(row[0]) dataValid[1].append([float(x) for x in row[2:]]) dataValid[2].append(int(row[1])) dataValid[1] = np.array(dataValid[1]) dataValid[2] = np.array(dataValid[2]) print(dataValid[1].shape) return np.concatenate([dataTrain[1], dataValid[1] ]), np.concatenate([dataTrain[2], dataValid[2]], 0)
def setUp(self): name = "cassandra20200615" mode = "train" repositories = [{ "name": "cassandra20200615", "url": "", "CommitTarget": "", "filterFile": "", "codeIssueJira": "", "projectJira": "" }] parameters = {} option = { "name": name, "mode": mode, "repositories": repositories, "parameters": parameters #needless when to infer. } option = Option(option) self.dataset = Dataset(option.getRepositorieImproved()) self.repository = repositories[0] print( os.path.join(UtilPath.Test(), "testDataset", self.repository["name"], "repository")) self.gr = GitRepository( os.path.join(UtilPath.Test(), "testDataset", self.repository["name"], "repository"))
def testCalculateLOC(self): with open( os.path.join(UtilPath.Test(), "testDataset", self.repository["name"], "loc.csv")) as f: reader = csv.reader(f) datasTest = [row for row in reader][1:] for dataTest in datasTest: pathFile = os.path.join(UtilPath.Test(), "testDataset", self.repository["name"], "repository", dataTest[18]).replace("/", "\\") with self.subTest(pathFile=pathFile): data = Data(self.gr, pathFile, self.dataset.getCommitsBug(self.repository)) self.assertEqual(data.calculateLOC(), int(dataTest[0]))
def __init__(self, project, typeDataset, release4Test, purpose): self.project = project self.typeDataset = typeDataset self.release4Test = release4Test self.porpose = purpose print(UtilPath.Datasets(), self.project, self.typeDataset, self.release4Test)
def do(self): self.checkGPU() os.makedirs(UtilPath.ResultAction(self.option["idExperiment"]), exist_ok=True) modeler = Modeler(UtilPath.ResultAction(self.option["idExperiment"])) dataset = Dataset(self.option["project"], self.option["typeDataset"], self.option["release4test"], self.option["purpose"]) if self.option["purpose"] == "test": xTrain4Test, yTrain4Test = dataset.getTrain4Test() xTest4Test, yTest4Test = dataset.getTest4Test() json_open = open(self.option["pathHP"], 'r') hp = json.load(json_open) modeler.test(xTrain4Test, yTrain4Test, xTest4Test, yTest4Test, self.option["modelAlgorithm"], hp) elif self.option["purpose"] == "search": xTrain4Search, yTrain4Search = dataset.getTrain4Search() xValid4Search, yValid4Search = dataset.getValid4Search() modeler.search(xTrain4Search, yTrain4Search, xValid4Search, yValid4Search, self.option["modelAlgorithm"], self.option["time2search"])
def getTest4Test(self, indexCV=0): dataTest = [[], [], []] with open( os.path.join(UtilPath.Datasets(), self.project, self.typeDataset, self.release4Test, "test.csv")) as f: test = csv.reader(f) for i, row in enumerate(test): dataTest[0].append(row[0]) dataTest[1].append([float(x) for x in row[2:]]) dataTest[2].append(int(row[1])) dataTest[1] = np.array(dataTest[1]) dataTest[2] = np.array(dataTest[2]) print(dataTest[1].shape) return dataTest[1], dataTest[2]
def getValid4Search(self, indexCV=0): dataValid = [[], [], []] with open( os.path.join(UtilPath.Datasets(), self.project, self.typeDataset, self.release4Test, "valid" + str(indexCV) + ".csv")) as f: valid = csv.reader(f) for i, row in enumerate(valid): dataValid[0].append(row[0]) dataValid[1].append([float(x) for x in row[2:]]) dataValid[2].append(int(row[1])) dataValid[1] = np.array(dataValid[1]) dataValid[2] = np.array(dataValid[2]) #print(dataValid[1].shape) #print(dataValid[2].shape) return dataValid[1], dataValid[2]
def getTrain4Search(self, indexCV=0): dataTrain = [[], [], []] with open( os.path.join(UtilPath.Datasets(), self.project, self.typeDataset, self.release4Test, "train" + str(indexCV) + ".csv")) as f: train = csv.reader(f) for i, row in enumerate(train): #print(item["id"]) dataTrain[0].append(row[0]) dataTrain[1].append([float(x) for x in row[2:]]) dataTrain[2].append(int(row[1])) dataTrain[1] = np.array(dataTrain[1]) dataTrain[2] = np.array(dataTrain[2]) #print(dataTrain[1].shape) #print(dataTrain[2].shape) return dataTrain[1], dataTrain[2]
from src.manager import Maneger from src.utility import UtilPath import datetime option={ "date" : datetime.datetime.now().strftime('%Y%m%d%H%M%S'), "project" : "linuxtools", "release4test" : "2", "variableDependent" : "isBuggy", "purpose" : "test", "modelAlgorithm" : "DNN", "pathModel" : "", "pathHP" : UtilPath.Datasets()+"/"+"linuxtools/isBuggy/2/hpDNN.json" } option["idExperiment"] = \ option["project"] + "_" \ + option["variableDependent"] + "_" \ + option["release4test"] + "_" \ + option["modelAlgorithm"] + "_" \ + option["date"] maneger = Maneger(option) maneger.do()