Example #1
0
def dist_to_weights(net, epsilon=0.001):
    '''Transforms a distance matrix / network to a weight
    matrix / network using the formula W = 1 - D / max(D).
    Returns a matrix/network'''

    N = len(net._nodes)

    if (isinstance(net, pynet.SymmFullNet)):
        newmat = pynet.SymmFullNet(N)
    else:
        newmat = pynet.SymmNet()

    edges = list(net.edges)

    maxd = 0.0
    for edge in edges:
        if edge[2] > maxd:
            maxd = edge[2]

    # epsilon trick; lowest weight will be almost but
    # not entirely zero

    maxd = maxd + epsilon

    for edge in edges:
        if not (edge[2] == maxd):
            newmat[edge[0]][edge[1]] = 1 - edge[2] / maxd

    netext.copyNodeProperties(net, newmat)

    return newmat
Example #2
0
    def getGroupwiseDistanceMatrix(self, groups, distance, groupNames=None):
        """
        Returns a distance matrix in form of a full network (pynet.SymmFullNet). The groups
        argument must be an iterable object where each element is also iterable object containing
        the indices of the nodes belonging to each group.
        """
        distance = distance.lower()  #any case is ok
        grouplist = list(groups)
        ngroups = len(grouplist)
        matrix = pynet.SymmFullNet(ngroups)
        if groupNames == None:
            groupNames = range(ngroups)

        if distance in ["goldstein", "goldstein_d1"]:
            #only distance measure implemented so far:
            if distance == "goldstein":
                getGroupwiseDistance = self.getGroupwiseDistance_Goldstein
            elif distance == "goldstein_d1":
                getGroupwiseDistance = self.getGroupwiseDistance_Goldstein_D1

            for i in range(0, ngroups):
                for j in range(i + 1, ngroups):
                    matrix[groupNames[i],
                           groupNames[j]] = getGroupwiseDistance(
                               grouplist[i], grouplist[j])
            return matrix
        elif distance in ["fst"]:  #allele frequency table based distances
            afTable = AlleleFrequencyTable()
            afTable.init_msData(self, grouplist, groupNames)
            if distance == "fst":
                return afTable.getFST()
        else:
            raise NotImplementedError("Distance '" + distance +
                                      "' is not implemented.")
Example #3
0
    def get_distance_matrix(self,
                            distance_function,
                            node_names,
                            progressUpdater=None):
        size = len(self.data)

        j = 0
        updateInterval = 1000
        totElems = size * (size - 1) / 2
        elementsAdded = 0
        lastUpdate = 0

        distance = {
            "jaccard_distance": self.get_jaccard_distance,
            "bc_dissimilarity": self.get_bc_dissimilarity
        }
        matrix = pynet.SymmFullNet(size)
        if node_names == None:
            node_names = range(0, size)
        for i, iName in enumerate(node_names):
            if progressUpdater != None:
                if elementsAdded - lastUpdate > updateInterval:
                    progressUpdater(float(elementsAdded) / float(totElems))
                elementsAdded += size - i
            for j in range(i + 1, size):
                jName = node_names[j]
                matrix[iName, jName] = distance[distance_function](i, j)
        return matrix
Example #4
0
    def getDistanceMatrix(self,
                          distance="lm",
                          nodeNames=None,
                          progressUpdater=None):
        """
        Computes the distance between each node and returns the corresponding
        distance matrix.
        """
        if distance == "lm":
            getMSDistance = self.getMSDistance_linearManhattan
        elif distance == "nsa":
            getMSDistance = self.getMSDistance_nonsharedAlleles
        elif distance == "ap":
            getMSDistance = self.getMSDistance_alleleParsimony
        elif distance == "hybrid":
            getMSDistance = self.getMSDistance_hybrid
        elif distance == "czekanowski":
            getMSDistance = self.get_czekanowski_dissimilarity
        else:  #default
            getMSDistance = self.getMSDistance_linearManhattan

        numberOfSpecimens = len(self._alleles[0])

        j = 0
        minUpdateInterval = 1000
        minUpdateSteps = 30
        totElems = numberOfSpecimens * (numberOfSpecimens - 1) / 2

        updateInterval = max(
            min(minUpdateInterval, int(totElems / float(minUpdateSteps))), 1)

        elementsAdded = 0
        lastUpdate = 0

        matrix = pynet.SymmFullNet(numberOfSpecimens)
        if nodeNames == None:
            nodeNames = range(0, numberOfSpecimens)
        for i, iName in enumerate(nodeNames):
            if progressUpdater != None:
                if elementsAdded - lastUpdate > updateInterval:
                    progressUpdater(float(elementsAdded) / float(totElems))
                    lastUpdate = elementsAdded
                elementsAdded += numberOfSpecimens - i
            for j in range(i + 1, numberOfSpecimens):
                jName = nodeNames[j]
                matrix[iName, jName] = getMSDistance(self.getNode(i),
                                                     self.getNode(j))
        return matrix
Example #5
0
    def getFST(self):
        """ From Reynolds, J., Weir, B.S., and Cockerham, C.C. (1983) Estimation of the 
        coancestry coefficient: basis for a short-term genetic distance. _Genetics_, 
        105:767-779, p. 769.
        """
        d = pynet.SymmFullNet(self.nGroups)

        for i in range(self.nGroups):
            for j in range(i):
                num = 0.0
                den = 0.0
                for locus in range(self.nLoci):
                    ni = float(self.totalFreq(i, locus))
                    nj = float(self.totalFreq(j, locus))
                    if ni > 0 and nj > 0:
                        summ = 0.0
                        ai = 1.0
                        aj = 1.0
                        fi = self.normalizedFreqs(i, locus)
                        fj = self.normalizedFreqs(j, locus)
                        for l in set(chain(fi.iterkeys(), fj.iterkeys())):
                            summ += (fi[l] - fj[l])**2
                            ai -= fi[l]**2
                            aj -= fj[l]**2

                        num += summ / 2. - (
                            (ni + nj) * (ni * ai + nj * aj)) / (4 * ni * nj *
                                                                (ni + nj - 1))
                        den += summ / 2. + (
                            (4 * ni * nj - ni - nj) *
                            (ni * ai + nj * aj)) / (4 * ni * nj *
                                                    (ni + nj - 1))

                if den > 0:
                    d[self.groupNames[i]][
                        self.groupNames[j]] = -math.log(1 - num / den)
                else:
                    d[self.groupNames[i]][
                        self.groupNames[j]] = 0.0  #not defined

        return d
Example #6
0
def loadNet_mat(input,
                mutualEdges=False,
                splitterChar=None,
                symmetricNet=True,
                nodeNames=[],
                type="square"):
    """
    Loads a network from a file which is in a weight matrix format. Nodes are ordered in ascending order by their names.
    
    Parameters
    ----------
    type : string
        "square": Input is in a square matrix format. Weight of an edge between node i an j 
        is in row i and column j of that row. 
        "upperdiag": Same as the square matrix, but now only the elements at the diagonal and above
        are present. 
        "lowerdiag": See upperdiag
        "supperdiag": Strictly upper diagonal. The diagonal elements are not present.
        "slowerdiag": see supperdiag.

    Returns
    -------
    The network that is loaded in a FullNet or in SymmFullNet format.
    """

    usenodelist = False
    if len(nodeNames) > 0:
        usenodelist = True

    #Get the network size
    if usenodelist:
        netSize = len(nodeNames)
    else:  #we have to infer the size from the file
        if type == "square" or type == "upperdiag" or type == "supperdiag":
            netSize = len(input.readline().strip().split(splitterChar))
            input.seek(0)
            if type == "supperdiag": netSize += 1
        elif type == "lowerdiag" or type == "slowerdiag":
            for line in input:
                pass  #it would be faster to read backwards
            input.seek(0)
            netSize = len(line.strip().split(splitterChar))
            if type == "slowerdiag": netSize += 1
        else:
            raise Exception("Invalid type for the matrix: " + str(type))

    if symmetricNet:
        newNet = pynet.SymmFullNet(netSize)
    else:
        newNet = pynet.FullNet(netSize)

    if type == "square":
        for rowIndex, line in enumerate(input):
            fields = line.strip().split(splitterChar)

            #Check that the matrix is of right form
            if netSize != (len(fields)):
                if usenodelabels and rowIndex == 0:
                    raise Exception(
                        "The length of the node label list does not macth the number of columns."
                    )
                else:
                    raise Exception("The length of row " + str(rowIndex) +
                                    " does not match the previous rows.")

            #Now fill the row of the matrix
            for columnIndex, element in enumerate(fields):
                if columnIndex != rowIndex:
                    if usenodelist:
                        newNet[nodeNames[rowIndex],
                               nodeNames[columnIndex]] = float(element)
                    else:
                        newNet[rowIndex, columnIndex] = float(element)
        if (rowIndex + 1) != netSize:
            raise Exception("Invalid number of rows: There are " +
                            str(rowIndex + 1) + " rows and " + str(netSize) +
                            " columns.")

    elif type == "upperdiag":
        for rowIndex, line in enumerate(input):
            fields = line.strip().split(splitterChar)

            #Check that the matrix is of right form
            if netSize != (len(fields) + rowIndex):
                if usenodelabels and rowIndex == 0:
                    raise Exception(
                        "The length of the node label list does not macth the number of columns."
                    )
                else:
                    raise Exception("The length of row " + str(rowIndex) +
                                    " does not match the previous rows.")

            #Now fill the row of the matrix
            for columnIndex, element in enumerate(fields[1:]):
                columnIndex += 1
                if usenodelist:
                    newNet[nodeNames[rowIndex],
                           nodeNames[columnIndex + rowIndex]] = float(element)
                else:
                    newNet[rowIndex, columnIndex + rowIndex] = float(element)
        if (rowIndex + 1) != netSize:
            raise Exception("Invalid number of rows: There are " +
                            str(rowIndex + 1) + " rows and " + str(netSize) +
                            " columns.")

    elif type == "supperdiag":
        for rowIndex, line in enumerate(input):
            fields = line.strip().split(splitterChar)

            #Check that the matrix is of right form
            if netSize != (len(fields) + rowIndex + 1):
                if usenodelabels and rowIndex == 0:
                    raise Exception(
                        "The length of the node label list does not macth the number of columns."
                    )
                else:
                    raise Exception("The length of row " + str(rowIndex) +
                                    " does not match the previous rows.")

            #Now fill the row of the matrix
            for columnIndex, element in enumerate(fields):
                columnIndex += 1
                if usenodelist:
                    newNet[nodeNames[rowIndex],
                           nodeNames[columnIndex + rowIndex]] = float(element)
                else:
                    newNet[rowIndex, columnIndex + rowIndex] = float(element)
        if (rowIndex + 2) != netSize:
            raise Exception("Invalid number of rows: There are " +
                            str(rowIndex + 1) + " rows and " + str(netSize) +
                            " columns.")

    elif type == "lowerdiag":
        for rowIndex, line in enumerate(input):
            fields = line.strip().split(splitterChar)

            #Check that the matrix is of right form
            if len(fields) != (rowIndex + 1):
                raise Exception("The length of row " + str(rowIndex) +
                                " does not match the previous rows.")

            #Now fill the row of the matrix
            for columnIndex, element in enumerate(fields[:-1]):
                if usenodelist:
                    newNet[nodeNames[rowIndex],
                           nodeNames[columnIndex]] = float(element)
                else:
                    newNet[rowIndex, columnIndex] = float(element)
        if (rowIndex + 1) != netSize:
            raise Exception("Invalid number of rows: There are " +
                            str(rowIndex + 1) + " rows and " + str(netSize) +
                            " columns.")

    elif type == "slowerdiag":
        for rowIndex, line in enumerate(input):
            fields = line.strip().split(splitterChar)
            rowIndex += 1

            #Check that the matrix is of right form
            if len(fields) != rowIndex:
                raise Exception("The length of row " + str(rowIndex) +
                                " does not match the previous rows.")

            #Now fill the row of the matrix
            for columnIndex, element in enumerate(fields):
                if usenodelist:
                    newNet[nodeNames[rowIndex],
                           nodeNames[columnIndex]] = float(element)
                else:
                    newNet[rowIndex, columnIndex] = float(element)
        if (rowIndex + 1) != netSize:
            raise Exception("Invalid number of rows: There are " +
                            str(rowIndex + 1) + " rows and " + str(netSize) +
                            " columns.")

    return newNet