def mp_worker((k, sigma, distancePath, dtype)):
    """
    run spectral clustering
    """

    sc = SpectralCluster(distancePath, dtype=dtype)
    sc.run(k, sk=None, sigma=sigma, verbose=True)

    return sc
def mp_worker((k,sigma,distancePath,dtype)):
    """
    run spectral clustering
    """

    sc = SpectralCluster(distancePath,dtype=dtype)
    sc.run(k,sk=None,sigma=sigma,verbose=True)
    
    return sc
    def _run_sc(self, toRun):
        """
        run spectral clustering (single core)
        """

        for params in toRun:
            k, sigma, dpath, dtype = params
            sc = SpectralCluster(self.distancePath, dtype=dtype)
            sc.run(k, sk=None, sigma=sigma, verbose=True)
            clusterSizes = self.get_cluster_sizes(sc)
            self.writer1.writerow([k, sigma] + [round(sc.avgSilValue, 4)])
            self.writer2.writerow([k, sigma] + clusterSizes)
    def _run_sc(self,toRun):
        """
        run spectral clustering (single core)
        """

        for params in toRun:
            k,sigma,dpath,dtype = params
            sc = SpectralCluster(self.distancePath,dtype=dtype)
            sc.run(k,sk=None,sigma=sigma,verbose=True)
            clusterSizes = self.get_cluster_sizes(sc)
            self.writer1.writerow([k,sigma] + [round(sc.avgSilValue,4)])
            self.writer2.writerow([k,sigma] + clusterSizes)
    def __init__(self,
                 distancePath,
                 dtype='distance',
                 aspect='biological_process'):
        """
        distancePath - path to distance matrix
        dype = distance [default] or similarity
        aspect = biological_process, molecular_function, or cellular_component
        """

        ## input
        if os.path.exists(distancePath) == False:
            raise Exception("cannot find distances file %s\nexiting..." %
                            (distancePath))

        if dtype not in ['distance', 'similarity']:
            raise Exception("Invalid distant type (dtype) specified")

        ## call an instance of SpectralClustering to ensure labels and matrix files are saved
        self.dtype = dtype
        self.aspect = aspect
        self.distancePath = distancePath
        sc = SpectralCluster(distancePath, dtype=dtype)
        matrixPath = re.sub("\.csv", "-matrix.npy", distancePath)
        genesPath = re.sub("\.csv", "-genes.npy", distancePath)
        self.M = np.load(matrixPath)
        self.items = np.load(genesPath)

        ## output
        self.resultsPath1 = re.sub("\.csv", "-scparams-sv.csv", distancePath)
        self.resultsPath2 = re.sub("\.csv", "-scparams-cl.csv", distancePath)
Beispiel #6
0
    scps = SpectralClusterParamSearch(geneDistancePath,dtype='distance')
    scps.run(chunks=15)

## plot the parameter search 
psFigureFile = os.path.join(homeDir,"param-scan-%s.png"%(_aspect))
if not os.path.exists(psFigureFile):
    scr = SpectralClusterResults(silvalFile,clustersFile)
    scr.plot(figName=psFigureFile)

## run spectral clustering
k = 20
sigma = 0.08

labelsPath = os.path.join(homeDir,"sc-labels-%s.csv"%(_aspect))
if not os.path.exists(labelsPath):
    sc = SpectralCluster(geneDistancePath,dtype='distance')
    sc.run(k,sk=None,sigma=sigma,verbose=True)
    sc.save(labelsPath=labelsPath)

## Save gene sets
bm = BlastMapper()
bmap = bm.load_summary('blast-parsed-summary.csv',best=False,taxaList=['8355','8364'])

transcriptMin,transcriptMax = 9,1000  
gsFile = os.path.join(homeDir,"%s.gmt"%(_aspect))                                                                                                       
if not os.path.exists(gsFile):
    gsc = GeneSetCollection(labelsPath,gene2go)
    gsc.write(blastMap=bmap,transcriptMin=transcriptMin,transcriptMax=transcriptMax,outFile=gsFile)

print("process complete.")
Beispiel #7
0
    scps.run(chunks=5, kRange=range(3, 11))

## plot the parameter search
psFigureFile = os.path.join(gsaDir, "param-scan-%s.png" % (_aspect))
if not os.path.exists(psFigureFile):
    scr = SpectralClusterResults(silvalFile, clustersFile)
    scr.plot(figName=psFigureFile)

## run spectral clustering
k = 3
sigma = 0.43

labelsPath = os.path.join(gsaDir, "sc-labels-%s.csv" % (_aspect))

if not os.path.exists(labelsPath):
    sc = SpectralCluster(geneDistancePath, dtype='distance')
    sc.run(k, sk=None, sigma=sigma, verbose=True)
    sc.save(labelsPath=labelsPath)

import networkx
from parse_KGML import KGML2Graph
from KeggPathway import KeggPathway

p = KeggPathway()
#p.add_node('gene1', data={'type': 'gene', })
#p.get_node('gene1')
#{'type': 'gene'}

graphfile = "%s.xml" % pathway
graph = KGML2Graph(graphfile)[1]