Beispiel #1
0
 def test_2(self):
     """l19 data should give stress below .13"""
     ptmtx = array(
         [
             [7, 1, 0, 0, 0, 0, 0, 0, 0],
             [4, 2, 0, 0, 0, 1, 0, 0, 0],
             [2, 4, 0, 0, 0, 1, 0, 0, 0],
             [1, 7, 0, 0, 0, 0, 0, 0, 0],
             [0, 8, 0, 0, 0, 0, 0, 0, 0],
             [0, 7, 1, 0, 0, 0, 0, 0, 0],  #idx 5
             [0, 4, 2, 0, 0, 0, 2, 0, 0],
             [0, 2, 4, 0, 0, 0, 1, 0, 0],
             [0, 1, 7, 0, 0, 0, 0, 0, 0],
             [0, 0, 8, 0, 0, 0, 0, 0, 0],
             [0, 0, 7, 1, 0, 0, 0, 0, 0],  #idx 10
             [0, 0, 4, 2, 0, 0, 0, 3, 0],
             [0, 0, 2, 4, 0, 0, 0, 1, 0],
             [0, 0, 1, 7, 0, 0, 0, 0, 0],
             [0, 0, 0, 8, 0, 0, 0, 0, 0],
             [0, 0, 0, 7, 1, 0, 0, 0, 0],  #idx 15
             [0, 0, 0, 4, 2, 0, 0, 0, 4],
             [0, 0, 0, 2, 4, 0, 0, 0, 1],
             [0, 0, 0, 1, 7, 0, 0, 0, 0]
         ],
         'float')
     distmtx = dist_euclidean(ptmtx)
     nm = NMDS(distmtx, verbosity=0)
     self.assertLessThan(nm.getStress(), .13)
Beispiel #2
0
 def test_3(self):
     """l19 data should give stress below .13 in multi-D"""
     ptmtx = array(
         [
             [7, 1, 0, 0, 0, 0, 0, 0, 0],
             [4, 2, 0, 0, 0, 1, 0, 0, 0],
             [2, 4, 0, 0, 0, 1, 0, 0, 0],
             [1, 7, 0, 0, 0, 0, 0, 0, 0],
             [0, 8, 0, 0, 0, 0, 0, 0, 0],
             [0, 7, 1, 0, 0, 0, 0, 0, 0],  #idx 5
             [0, 4, 2, 0, 0, 0, 2, 0, 0],
             [0, 2, 4, 0, 0, 0, 1, 0, 0],
             [0, 1, 7, 0, 0, 0, 0, 0, 0],
             [0, 0, 8, 0, 0, 0, 0, 0, 0],
             [0, 0, 7, 1, 0, 0, 0, 0, 0],  #idx 10
             [0, 0, 4, 2, 0, 0, 0, 3, 0],
             [0, 0, 2, 4, 0, 0, 0, 1, 0],
             [0, 0, 1, 7, 0, 0, 0, 0, 0],
             [0, 0, 0, 8, 0, 0, 0, 0, 0],
             [0, 0, 0, 7, 1, 0, 0, 0, 0],  #idx 15
             [0, 0, 0, 4, 2, 0, 0, 0, 4],
             [0, 0, 0, 2, 4, 0, 0, 0, 1],
             [0, 0, 0, 1, 7, 0, 0, 0, 0]
         ],
         'float')
     distmtx = dist_euclidean(ptmtx)
     for dim in range(3, 18):
         nm = NMDS(distmtx, verbosity=0, dimension=dim)
         self.assertLessThan(nm.getStress(), .13)
Beispiel #3
0
    print "Removing species with less than two occurrences..."
    sp_io = np.where(~(sp > 0), sp, 1)
    column_sums = np.sum(sp_io, 0)
    to_remove = np.where(column_sums < 2)
    sp = np.delete(sp, to_remove, 1)
    colnames = np.delete(colnames, to_remove)
    
    print "Removing plots with less than two species..."
    pl_io = np.where(~(sp > 0), sp, 1)
    row_sums = np.sum(pl_io, 1)
    to_remove = np.where(row_sums < 2)
    sp = np.delete(sp, to_remove, 0)
    rownames = np.delete(rownames, to_remove)

    #print sp.shape, len(rownames)
    #print sp.shape, len(colnames)

    print "Normalizing species coverage data with McCune logarithm..."
    sp = log_mccune(sp)

    from cogent.cluster.nmds import NMDS, metaNMDS
    from cogent.maths.distance_transform import dist_bray_curtis
    
    print "Calculating distance matrix..."
    distmtx = dist_bray_curtis(sp)
    
    nmds = NMDS(distmtx, dimension = 3)
    print nmds.getPoints()
    print nmds.getStress()

    #nmds = NMDS()
def reduce_similarity_matrix(similarity_matrix):
    #distance_matrix = dist_euclidean(similarity_matrix)
    distance_matrix = 1 - similarity_matrix
    return NMDS(distance_matrix).getPoints()
Beispiel #5
0
 def setUp(self):
     """creates inputs"""
     self.mtx = array(
         [[0, 3, 4, 8], [3, 0, 1, 27], [4, 1, 0, 3.5], [8, 27, 3.5, 0]],
         'd')
     self.nm = NMDS(self.mtx, verbosity=0)
Beispiel #6
0
    def run(self, tempDistanceMetric=None, iDims=2, strDistanceMatrixFile=None, istrmTree=None, istrmEnvr=None):
        """
        Runs analysis on loaded data.

        :param tempDistanceMetric: The name of the distance metric to use when performing PCoA.
                                   None indicates a distance matrix was already given when loading and will be used.
                                   Supports "braycurtis","canberra","chebyshev","cityblock","correlation",
				   "cosine","euclidean","hamming","sqeuclidean",unifrac_unweighted","unifrac_weighted"
        :type: String Distance matrix name
        :param iDims: How many dimension to plot the PCoA graphs.
                      (This can be minimally 2; all combinations of dimensions are plotted).
                      iDims start with 1 (not index-based).
        :type: Integer Positive integer 2 or greater.
	:param strDistanceMatrixFile: If the underlying distance matrix should be output, this is the file to output to.
	:type: String Output file for distances of None for indicating it shoudl not be done.
	:param istrmTree: One of two files needed for unifrac calculations, this is the phylogeny of the features.
	:type: String Path to file
	:param istrmEnvr: One of two files needed for unifrac calculations, this is the environment file for the features.
	:type: String Path to file
        :return boolean: Indicator of success (True)
        """

        if iDims > 1:
            self._iDimensions = iDims

        #If distance metric is none, check to see if the matrix is a distance matrix
        #If so, run NMDS on the distance matrix
        #Otherwise return a false and do not run
        if(tempDistanceMetric==None):
            if(ValidateData.funcIsTrue(self.isRawData)):
                print("PCoA:run::Error, no distance metric was specified but the previous load was not of a distance matrix.")
                return False
            elif(ValidateData.funcIsFalse(self.isRawData)):
                self.pcoa = NMDS(dataMatrix, verbosity=0)
                return True
        
        #Make sure the distance metric was a valid string type
        if(not ValidateData.funcIsValidString(tempDistanceMetric)):
            print("PCoA:run::Error, distance metric was not a valid string type.")
            return False

        #Supported distances
	
        distanceMatrix = None
        if(tempDistanceMetric==self.c_SPEARMAN):
            distanceMatrix = Metric().funcGetDissimilarity(ldSampleTaxaAbundancies=self.dataMatrix, funcDistanceFunction=lambda u,v: spearmanr(u,v)[0])
        if(tempDistanceMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]):
            distanceMatrix,lsLabels = Metric().funcGetBetaMetric(sMetric=tempDistanceMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
            self.lsIDs = lsLabels
        else:
            distanceMatrix = Metric().funcGetBetaMetric(npadAbundancies=self.dataMatrix, sMetric=tempDistanceMetric)
        if(ValidateData.funcIsFalse(distanceMatrix)):
            print "PCoA:run::Error, when generating distance matrix."
            return False

        # Make squareform
        distanceMatrix = squareform(distanceMatrix)

        # Writes distance measures if needed.
        if strDistanceMatrixFile:
            csvrDistance = csv.writer(open(strDistanceMatrixFile, 'w'))
            if self.lsIDs:
                csvrDistance.writerow(["ID"]+self.lsIDs)

            for x in xrange(distanceMatrix.shape[0]):
                strId = [self.lsIDs[x]] if self.lsIDs else []
                csvrDistance.writerow(strId+distanceMatrix[x].tolist())

        self.pcoa = NMDS(distanceMatrix, dimension=max(self._iDimensions,2), verbosity=0)
        self.strRecentMetric = tempDistanceMetric
        return True
def pcoa_coords(dist_arr):
    arr = NMDS(dist_arr, verbosity=0).getPoints()
    return map(list, arr)