def getParetoInfo(clusterVector = None, extraData = None): paretoInfo = [] extraDataWithClusters = extraData[:] if clusterVector and extraData: for i in range(len(clusterVector)): clusterProvisionalList = [] dataList = [] clusterName = "Cluster #%i" % i for clusterElement in clusterVector[i]: nValue = float("nan") extraDataIndex = stats.firstIndex(stats.extractColumns(extraDataWithClusters, 0), clusterElement) if str(extraData[extraDataIndex][1]).lower() != "nan": nValue = int(extraData[extraDataIndex][1]) clusterProvisionalList.append([extraDataIndex, nValue, # n float(extraData[extraDataIndex][4]), #X extraData[extraDataIndex][0]]) # id dataList.append([nValue, abs(float(extraData[extraDataIndex][4]))]) for clusterElement in clusterProvisionalList: dataPoint = [clusterElement[1], abs(clusterElement[2])] # {n, X} extraDataWithClusters[clusterElement[0]].extend([clusterName, stats.isParetoFront(dataPoint, dataList)]) paretoInfo.append([extraDataWithClusters[clusterElement[0]][0], extraDataWithClusters[clusterElement[0]][6]]) return paretoInfo, extraDataWithClusters
def filterNFDRorZ(lowerData, higherData, minN=2, maxN=1e6, minZ=0.0, maxFDR=sys.float_info.max, useFDR=True): # filter n # listNok, because listNok is for faster searching, # while listNok2 is for storing the nij listNok = [] listNok2 = [] for row in lowerData: lowerElement = convertToStatsResult(row) if lowerElement.nij >= minN and lowerElement.nij <= maxN and lowerElement.id2 not in listNok: listNok.append(lowerElement.id2) listNok2.append([lowerElement.id2, lowerElement.nij]) # filter Z listNZok = [] for row in higherData: higherElement = convertToStatsResult(row) if useFDR: if str(higherElement.FDRij ) != "nan" and higherElement.id1 in listNok: if higherElement.FDRij <= maxFDR: index = stats.firstIndex(listNok, higherElement.id1) nij = int(listNok2[index][1]) listNZok.append([ higherElement.id1, nij, higherElement.Zij, higherElement.FDRij, higherElement.Xi - higherElement.Xj ]) else: if str(higherElement.Zij ) != "nan" and higherElement.id1 in listNok: if abs(higherElement.Zij) >= minZ: index = stats.firstIndex(listNok, higherElement.id1) nij = int(listNok2[index][1]) listNZok.append([ higherElement.id1, nij, higherElement.Zij, higherElement.FDRij, higherElement.Xi - higherElement.Xj ]) return listNZok
def scalarProduct(vectorA, vectorB): # returns the percent of elements of A # that are contained in B # where 1 means all the elements of A are present in B # and 0 means A and B are orthogonal # next is just to speed up a bit the program if len(vectorA) < len(vectorB): vectorShort = vectorA vectorLong = vectorB else: vectorShort = vectorB vectorLong = vectorA product = 0 for element in vectorShort: first = stats.firstIndex(vectorLong, element) if first == -1: pass else: product += 1 return product
def associateElements(inStats = "", uFile = "", relFile = ""): results = [] relations = stats.loadRelationsFile(relFile) relations = stats.sortByIndex(relations, 0) statsData = stats.loadStatsDataFile(inStats) ZijList = [] for element in statsData: ZijList.append([element[3], element[7]]) theorList = [] experList = [] N = len(ZijList) for i in range(N): theorList.append([ZijList[i][0], ZijList[i][1], norm.cdf(float(ZijList[i][1]))]) experList.append([ZijList[i][0], ZijList[i][1], (float(i) + 0.5) / float(N)]) higherElements = stats.load2stringList(uFile, removeCommas = True) # WARNING! higherElements must be a list of lists # with each sublist being id, n, Z, FDR, X # begin: jmrc if not higherElements: sms = "ERROR: higherElements is empty. The higherElements must be a list of lists with each sublist being id, n, Z, FDR, X" sys.exit(sms) # end: jmrc elementList = [] if higherElements[0] == ['id', 'Z', 'n']: # this means the list comes from SanXoTSqueezer # so the header and the extra columns have to be removed for element in higherElements[1:]: # switch to id, n, Z, FDR elementList.append([element[0], element[2], element[1], float("nan"), float("nan")]) if higherElements[0] == ['id', 'n', 'Z', 'FDR']: # this means it does not contain X, so a nan is put on its place for element in higherElements[1:]: elementList.append([element[0], element[1], element[2], element[3], float("nan")]) if higherElements[0] == ['id', 'n', 'Z', 'FDR', 'X']: for element in higherElements[1:]: elementList.append([element[0], element[1], element[2], element[3], element[4]]) # otherwise if higherElements[0] != ['id', 'Z', 'n'] and higherElements[0] != ['id', 'n', 'Z', 'FDR'] and higherElements[0] != ['id', 'n', 'Z', 'FDR', 'X']: for element in higherElements: elementList.append([element[0], float("nan"), float("nan"), float("nan"), float("nan")]) statsData = stats.sortByIndex(statsData, 7) relationsFirstColumn = stats.extractColumns(relations, 0) relationsSecondColumn = stats.extractColumns(relations, 1) experListFirstColumn = stats.extractColumns(experList, 0) for uElement in elementList: lowerElementList = [] first = stats.firstIndex(relationsFirstColumn, uElement[0]) if first > -1: # -1 means it is not in the list notInList = 0 last = stats.lastIndex(relationsFirstColumn, uElement[0]) lowerElements = relationsSecondColumn[first:last + 1] # "+1" is to include the last one for element in lowerElements: lowerIndex = stats.firstIndex(experListFirstColumn, element) if lowerIndex > -1: # -1 means it is not in the list lowerElementList.append(element) else: notInList += 1 lowerElementList = stats.sortByIndex(lowerElementList, 0) results.append([uElement[0], lowerElementList]) else: if len(uElement[0].strip()) > 0: results.append([uElement[0], None]) return results, elementList, ""