def dist_to_weights(net, epsilon=0.001): '''Transforms a distance matrix / network to a weight matrix / network using the formula W = 1 - D / max(D). Returns a matrix/network''' N = len(net._nodes) if (isinstance(net, pynet.SymmFullNet)): newmat = pynet.SymmFullNet(N) else: newmat = pynet.SymmNet() edges = list(net.edges) maxd = 0.0 for edge in edges: if edge[2] > maxd: maxd = edge[2] # epsilon trick; lowest weight will be almost but # not entirely zero maxd = maxd + epsilon for edge in edges: if not (edge[2] == maxd): newmat[edge[0]][edge[1]] = 1 - edge[2] / maxd netext.copyNodeProperties(net, newmat) return newmat
def getGroupwiseDistanceMatrix(self, groups, distance, groupNames=None): """ Returns a distance matrix in form of a full network (pynet.SymmFullNet). The groups argument must be an iterable object where each element is also iterable object containing the indices of the nodes belonging to each group. """ distance = distance.lower() #any case is ok grouplist = list(groups) ngroups = len(grouplist) matrix = pynet.SymmFullNet(ngroups) if groupNames == None: groupNames = range(ngroups) if distance in ["goldstein", "goldstein_d1"]: #only distance measure implemented so far: if distance == "goldstein": getGroupwiseDistance = self.getGroupwiseDistance_Goldstein elif distance == "goldstein_d1": getGroupwiseDistance = self.getGroupwiseDistance_Goldstein_D1 for i in range(0, ngroups): for j in range(i + 1, ngroups): matrix[groupNames[i], groupNames[j]] = getGroupwiseDistance( grouplist[i], grouplist[j]) return matrix elif distance in ["fst"]: #allele frequency table based distances afTable = AlleleFrequencyTable() afTable.init_msData(self, grouplist, groupNames) if distance == "fst": return afTable.getFST() else: raise NotImplementedError("Distance '" + distance + "' is not implemented.")
def get_distance_matrix(self, distance_function, node_names, progressUpdater=None): size = len(self.data) j = 0 updateInterval = 1000 totElems = size * (size - 1) / 2 elementsAdded = 0 lastUpdate = 0 distance = { "jaccard_distance": self.get_jaccard_distance, "bc_dissimilarity": self.get_bc_dissimilarity } matrix = pynet.SymmFullNet(size) if node_names == None: node_names = range(0, size) for i, iName in enumerate(node_names): if progressUpdater != None: if elementsAdded - lastUpdate > updateInterval: progressUpdater(float(elementsAdded) / float(totElems)) elementsAdded += size - i for j in range(i + 1, size): jName = node_names[j] matrix[iName, jName] = distance[distance_function](i, j) return matrix
def getDistanceMatrix(self, distance="lm", nodeNames=None, progressUpdater=None): """ Computes the distance between each node and returns the corresponding distance matrix. """ if distance == "lm": getMSDistance = self.getMSDistance_linearManhattan elif distance == "nsa": getMSDistance = self.getMSDistance_nonsharedAlleles elif distance == "ap": getMSDistance = self.getMSDistance_alleleParsimony elif distance == "hybrid": getMSDistance = self.getMSDistance_hybrid elif distance == "czekanowski": getMSDistance = self.get_czekanowski_dissimilarity else: #default getMSDistance = self.getMSDistance_linearManhattan numberOfSpecimens = len(self._alleles[0]) j = 0 minUpdateInterval = 1000 minUpdateSteps = 30 totElems = numberOfSpecimens * (numberOfSpecimens - 1) / 2 updateInterval = max( min(minUpdateInterval, int(totElems / float(minUpdateSteps))), 1) elementsAdded = 0 lastUpdate = 0 matrix = pynet.SymmFullNet(numberOfSpecimens) if nodeNames == None: nodeNames = range(0, numberOfSpecimens) for i, iName in enumerate(nodeNames): if progressUpdater != None: if elementsAdded - lastUpdate > updateInterval: progressUpdater(float(elementsAdded) / float(totElems)) lastUpdate = elementsAdded elementsAdded += numberOfSpecimens - i for j in range(i + 1, numberOfSpecimens): jName = nodeNames[j] matrix[iName, jName] = getMSDistance(self.getNode(i), self.getNode(j)) return matrix
def getFST(self): """ From Reynolds, J., Weir, B.S., and Cockerham, C.C. (1983) Estimation of the coancestry coefficient: basis for a short-term genetic distance. _Genetics_, 105:767-779, p. 769. """ d = pynet.SymmFullNet(self.nGroups) for i in range(self.nGroups): for j in range(i): num = 0.0 den = 0.0 for locus in range(self.nLoci): ni = float(self.totalFreq(i, locus)) nj = float(self.totalFreq(j, locus)) if ni > 0 and nj > 0: summ = 0.0 ai = 1.0 aj = 1.0 fi = self.normalizedFreqs(i, locus) fj = self.normalizedFreqs(j, locus) for l in set(chain(fi.iterkeys(), fj.iterkeys())): summ += (fi[l] - fj[l])**2 ai -= fi[l]**2 aj -= fj[l]**2 num += summ / 2. - ( (ni + nj) * (ni * ai + nj * aj)) / (4 * ni * nj * (ni + nj - 1)) den += summ / 2. + ( (4 * ni * nj - ni - nj) * (ni * ai + nj * aj)) / (4 * ni * nj * (ni + nj - 1)) if den > 0: d[self.groupNames[i]][ self.groupNames[j]] = -math.log(1 - num / den) else: d[self.groupNames[i]][ self.groupNames[j]] = 0.0 #not defined return d
def loadNet_mat(input, mutualEdges=False, splitterChar=None, symmetricNet=True, nodeNames=[], type="square"): """ Loads a network from a file which is in a weight matrix format. Nodes are ordered in ascending order by their names. Parameters ---------- type : string "square": Input is in a square matrix format. Weight of an edge between node i an j is in row i and column j of that row. "upperdiag": Same as the square matrix, but now only the elements at the diagonal and above are present. "lowerdiag": See upperdiag "supperdiag": Strictly upper diagonal. The diagonal elements are not present. "slowerdiag": see supperdiag. Returns ------- The network that is loaded in a FullNet or in SymmFullNet format. """ usenodelist = False if len(nodeNames) > 0: usenodelist = True #Get the network size if usenodelist: netSize = len(nodeNames) else: #we have to infer the size from the file if type == "square" or type == "upperdiag" or type == "supperdiag": netSize = len(input.readline().strip().split(splitterChar)) input.seek(0) if type == "supperdiag": netSize += 1 elif type == "lowerdiag" or type == "slowerdiag": for line in input: pass #it would be faster to read backwards input.seek(0) netSize = len(line.strip().split(splitterChar)) if type == "slowerdiag": netSize += 1 else: raise Exception("Invalid type for the matrix: " + str(type)) if symmetricNet: newNet = pynet.SymmFullNet(netSize) else: newNet = pynet.FullNet(netSize) if type == "square": for rowIndex, line in enumerate(input): fields = line.strip().split(splitterChar) #Check that the matrix is of right form if netSize != (len(fields)): if usenodelabels and rowIndex == 0: raise Exception( "The length of the node label list does not macth the number of columns." ) else: raise Exception("The length of row " + str(rowIndex) + " does not match the previous rows.") #Now fill the row of the matrix for columnIndex, element in enumerate(fields): if columnIndex != rowIndex: if usenodelist: newNet[nodeNames[rowIndex], nodeNames[columnIndex]] = float(element) else: newNet[rowIndex, columnIndex] = float(element) if (rowIndex + 1) != netSize: raise Exception("Invalid number of rows: There are " + str(rowIndex + 1) + " rows and " + str(netSize) + " columns.") elif type == "upperdiag": for rowIndex, line in enumerate(input): fields = line.strip().split(splitterChar) #Check that the matrix is of right form if netSize != (len(fields) + rowIndex): if usenodelabels and rowIndex == 0: raise Exception( "The length of the node label list does not macth the number of columns." ) else: raise Exception("The length of row " + str(rowIndex) + " does not match the previous rows.") #Now fill the row of the matrix for columnIndex, element in enumerate(fields[1:]): columnIndex += 1 if usenodelist: newNet[nodeNames[rowIndex], nodeNames[columnIndex + rowIndex]] = float(element) else: newNet[rowIndex, columnIndex + rowIndex] = float(element) if (rowIndex + 1) != netSize: raise Exception("Invalid number of rows: There are " + str(rowIndex + 1) + " rows and " + str(netSize) + " columns.") elif type == "supperdiag": for rowIndex, line in enumerate(input): fields = line.strip().split(splitterChar) #Check that the matrix is of right form if netSize != (len(fields) + rowIndex + 1): if usenodelabels and rowIndex == 0: raise Exception( "The length of the node label list does not macth the number of columns." ) else: raise Exception("The length of row " + str(rowIndex) + " does not match the previous rows.") #Now fill the row of the matrix for columnIndex, element in enumerate(fields): columnIndex += 1 if usenodelist: newNet[nodeNames[rowIndex], nodeNames[columnIndex + rowIndex]] = float(element) else: newNet[rowIndex, columnIndex + rowIndex] = float(element) if (rowIndex + 2) != netSize: raise Exception("Invalid number of rows: There are " + str(rowIndex + 1) + " rows and " + str(netSize) + " columns.") elif type == "lowerdiag": for rowIndex, line in enumerate(input): fields = line.strip().split(splitterChar) #Check that the matrix is of right form if len(fields) != (rowIndex + 1): raise Exception("The length of row " + str(rowIndex) + " does not match the previous rows.") #Now fill the row of the matrix for columnIndex, element in enumerate(fields[:-1]): if usenodelist: newNet[nodeNames[rowIndex], nodeNames[columnIndex]] = float(element) else: newNet[rowIndex, columnIndex] = float(element) if (rowIndex + 1) != netSize: raise Exception("Invalid number of rows: There are " + str(rowIndex + 1) + " rows and " + str(netSize) + " columns.") elif type == "slowerdiag": for rowIndex, line in enumerate(input): fields = line.strip().split(splitterChar) rowIndex += 1 #Check that the matrix is of right form if len(fields) != rowIndex: raise Exception("The length of row " + str(rowIndex) + " does not match the previous rows.") #Now fill the row of the matrix for columnIndex, element in enumerate(fields): if usenodelist: newNet[nodeNames[rowIndex], nodeNames[columnIndex]] = float(element) else: newNet[rowIndex, columnIndex] = float(element) if (rowIndex + 1) != netSize: raise Exception("Invalid number of rows: There are " + str(rowIndex + 1) + " rows and " + str(netSize) + " columns.") return newNet