def getAnalysisResult(self, mainClassPath): print("getAnalysisResult(" + mainClassPath + ")") pearsonResultPath = os.path.join(mainClassPath, "PearsonResult.csv").replace("\\","/") listdir = os.listdir(mainClassPath) mainClass = mainClassPath.split("/")[-1] #mainClassNum = mainClass[0] print("mainClass = " + mainClass) #print("mainClassNum = " + mainClassNum) mainClassDictValue = self.mainClassDict.get(mainClass, None) # if mainClassDictValue is None: # mainClassDictValue = self.mainClassDict.get(mainClass, None) if mainClassDictValue is None: mainClassDictValue = self.defaultSelectProperty print("mainClassDictValue = ", mainClassDictValue) if mainClassDictValue is None: raise Exception("getSlnFiles(" + mainClass + "): 未正確設定欲做為皮爾森分析的屬性,請正確設定 self.mainClassDict 或 self.defaultSelectProperty !!") subClassFolderSet = set() com_phenoSet = set() for lists in listdir: if self.re_SubClass.match(lists): subClassFolderSet.add(lists) if self.re_Com_Pheno.match(lists): com_phenoSet.add(lists) subClassCount = len(subClassFolderSet) print("subClassCount = " + str(subClassCount)) if len(com_phenoSet) != 1: raise Exception("com_pheno檔案位於 " + mainClassPath + " 路徑下不只一個,請移除或重新命名非必要項目(搜尋規則:com_pheno.*\.[(xlsx)(xls)(csv)])!") print("Get Excel File: " + os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/")) excel = Excel(os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"), self.blnShowExcel, self.blnForceRestartExcel) #excel.get_sheet(1) self.getPhoneSheet(excel) dataRecoderCount = excel.sheetRowCounts - 1 print("dataRecoderCount = " + str(dataRecoderCount)) #randomPickLists = randomPickNGroup(dataRecoderCount, subClassCount) randomPickDict = {} case = 0 for subClassFolder in subClassFolderSet: randomPickList = [] case+=1 phone_v_Path = os.path.join(mainClassPath, "pheno_" + str(subClassFolder) + ".csv").replace("\\","/") print("Read phone_v_File(" + phone_v_Path + ")") csv = CSVFile(phone_v_Path, decoding="utf-8") #print(csv.readToString()) list2D = csv.readTo2DList(",") #print list2D #print "-------------------------------------" mappingColIdx = None if is_integer(self.pheno_MappingColId): mappingColIdx = self.pheno_MappingColId - 1 else: m = -1 for colName in list2D[0]: m+=1 if colName.strip().lower() == self.pheno_MappingColId.strip().lower(): mappingColIdx = m #print("mappingColIdx = " + str(mappingColIdx)) list2D.pop(0) #print list2D[0] for line in list2D: #print(line) #print(line[0])' if line[mappingColIdx] != "": data = line[mappingColIdx].lower() randomPickList.append(data) randomPickDict[subClassFolder] = randomPickList pass print ("randomPickDict = ", randomPickDict) for (k, randomList) in randomPickDict.items(): print (len(randomList)) colNamesString = list2str(excel.get_rowData(1)) #range1 = excel.get_range(1, 1, 1 , excel.sheetColCounts) print("list2str=", colNamesString) #case = 0 #subClassesResult = [] #[v1, v2, v3, ...] subClassesResult = {} #[v1, v2, v3, ...] for subClass in subClassFolderSet: result = self.getSlnFiles(excel, mainClassDictValue, mainClassPath, subClass, randomPickDict[subClass]) #subClassesResult.append(result) subClassesResult[subClass] = result #case+=1 print subClassesResult excel.close() print("++++++++++++++++++++++++++++++++++++++++++++++++") csv = CSVFile(pearsonResultPath, "utf-8") classCaseNum = 0 #for slnFilesResult in subClassesResult: for (subClassName, slnFilesResult) in subClassesResult.items(): #classRecoderSize = randomPickLists[classCaseNum] classRecoderSize = len(randomPickDict[subClassName]) for lineNum in range(1, classRecoderSize+2): strLine = "" for sectionNum in range(1, len(mainClassDictValue)+1): #print(slnFilesResult[sectionNum-1][lineNum-1]) strLine = strLine + list2str(slnFilesResult[sectionNum-1][lineNum-1]) + ",," csv.writeLine(strLine) csv.writeLine("") classCaseNum+=1 #subClassNum=0 subClassResultDict = OrderedDict() for (subClassName, slnFilesResult) in subClassesResult.items(): #subClassNum+=1 slnClassNum=0 slnResultDict = OrderedDict() #subClass_v1 v_type = slnFilesResult[0][0][2][9:] for slnFile in slnFilesResult: slnClassNum+=1 pearsonr_list1 = [] pearsonr_list2 = [] #type_9 select_Type = slnFile[0][5][5:] slnFile.pop(0) for lineData in slnFile: print("lineData = ", lineData) pearsonr_list1.append(lineData[4]) pearsonr_list2.append(lineData[5]) print("pearsonr_list1(v_type=" + str(v_type) + ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " , pearsonr_list1) print("pearsonr_list2(v_type=" + str(v_type) + ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " , pearsonr_list2) pearson_result = pearsonr(pearsonr_list1, pearsonr_list2) pearsonr_list1[:] = [] pearsonr_list2[:] = [] print("pearsonr_result(v_type=" + str(v_type) + ", slnClassNum=" + str(slnClassNum) + ", select_Type=" + str(select_Type) + ") : " + str(pearson_result)) if str(select_Type) in slnResultDict: raise Exception("select_Type 重複,請檢查 self.mainClassDict 或 self.defaultSelectProperty 是否有重複設定!!") slnResultDict[str(select_Type)] = pearson_result subClassResultDict[str(v_type)] = slnResultDict return subClassResultDict
def getPhenoResult(self, mainClassPath): print("getPhenoResult(" + mainClassPath + ")") listdir = os.listdir(mainClassPath) mainClass = mainClassPath.split("/")[-1] print("mainClass = " + mainClass) #subClassFolderSet = set() com_phenoSet = set() for lists in listdir: # if self.re_SubClass.match(lists): # subClassFolderSet.add(lists) if self.re_Com_Pheno.match(lists): com_phenoSet.add(lists) subClassFolderList = self.mainClass_subClassMap.get(mainClass, None) if subClassFolderList is None: subClassFolderList = self.defaultSubClassList subClassCount = len(subClassFolderList) print("subClassCount = " + str(subClassCount)) if len(com_phenoSet) != 1: raise Exception("com_pheno檔案位於 " + mainClassPath + " 路徑下不只一個,請移除或重新命名非必要項目(搜尋規則:com_pheno.*\.[(xlsx)(xls)(csv)])!") print("Get Excel File: " + os.path.join(mainClassPath, list(com_phenoSet)[0])) excel = Excel(os.path.join(mainClassPath, list(com_phenoSet)[0]).replace("\\","/"), self.blnShowExcel, self.blnForceRestartExcel) #excel.get_sheet(1) self.getPhoneSheet(excel) dataRecoderCount = excel.sheetRowCounts - 1 print("dataRecoderCount = " + str(dataRecoderCount)) randomPickLists = randomPickNGroup(dataRecoderCount, subClassCount) print ("randomPickList = ", randomPickLists) for randomList in randomPickLists: print (len(randomList)) colNamesList = excel.get_rowData(1) colNamesString = list2str(colNamesList) #range1 = excel.get_range(1, 1, 1 , excel.sheetColCounts) print("list2str=", colNamesString) case = 0 #for randomList in randomPickLists: for subClassFolder in subClassFolderList: phenoPath = os.path.join(mainClassPath, "pheno_" + str(subClassFolder) + ".csv").replace("\\","/") phenoExceptPath = os.path.join(mainClassPath, "pheno_except_" + str(subClassFolder) + ".csv").replace("\\","/") csv = CSVFile(phenoPath, "utf-8") csv.writeLine(colNamesString) print("len(randomPickLists[case] = ", len(randomPickLists[case])) for num in randomPickLists[case]: rowData = excel.get_rowData(num+1) rowData = list(rowData) #if not self.isExcelFirstRowFloat: for colNum in self.com_pheno_NotFloatCols: mappingColIdx = None if is_integer(colNum): #print("is_integer") mappingColIdx = colNum - 1 else: #print("is not integer") m = -1 for colName in colNamesList: #print(colName, colNum) m+=1 if str(colNum).strip().lower() == colName.strip().lower(): mappingColIdx = m #print("mappingColIdx = " + str(mappingColIdx)) try: rowData[mappingColIdx] = int(rowData[mappingColIdx]) except Exception: print("Change Excel Value Warning(int(rowData[" + str(mappingColIdx) + "])): " + str(rowData[mappingColIdx]), "You can check self.com_pheno_NotFloatCols, ignore this warning if expected !!") rowData[mappingColIdx] = str(rowData[mappingColIdx]) csv.writeLine(list2str(rowData)) csv = CSVFile(phenoExceptPath, "utf-8") csv.writeLine(colNamesString) differenceList = [] i = 0 for it in randomPickLists: if i != case: differenceList.extend(it) i+=1 print("differenceList", differenceList) print("differenceList len = " + str(len(differenceList))) for num in differenceList: rowData = excel.get_rowData(num+1) rowData = list(rowData) #if not self.isExcelFirstRowFloat: for colNum in self.com_pheno_NotFloatCols: mappingColIdx = None if is_integer(colNum): #print("is_integer") mappingColIdx = colNum - 1 else: #print("is not integer") m = -1 for colName in colNamesList: #print(colName, colNum) m+=1 if str(colNum).strip().lower() == colName.strip().lower(): mappingColIdx = m #print("mappingColIdx = " + str(mappingColIdx)) try: rowData[mappingColIdx] = int(rowData[mappingColIdx]) except Exception: print("Change Excel Value Warning(int(rowData[" + str(mappingColIdx) + "])): " + str(rowData[mappingColIdx]), "You can check self.com_pheno_NotFloatCols, ignore this warning if expected !!") rowData[mappingColIdx] = str(rowData[mappingColIdx]) csv.writeLine(list2str(rowData)) case+=1 excel.close()