def __init__(self,
                 distancePath,
                 dtype='distance',
                 aspect='biological_process'):
        """
        distancePath - path to distance matrix
        dype = distance [default] or similarity
        aspect = biological_process, molecular_function, or cellular_component
        """

        ## input
        if os.path.exists(distancePath) == False:
            raise Exception("cannot find distances file %s\nexiting..." %
                            (distancePath))

        if dtype not in ['distance', 'similarity']:
            raise Exception("Invalid distant type (dtype) specified")

        ## call an instance of SpectralClustering to ensure labels and matrix files are saved
        self.dtype = dtype
        self.aspect = aspect
        self.distancePath = distancePath
        sc = SpectralCluster(distancePath, dtype=dtype)
        matrixPath = re.sub("\.csv", "-matrix.npy", distancePath)
        genesPath = re.sub("\.csv", "-genes.npy", distancePath)
        self.M = np.load(matrixPath)
        self.items = np.load(genesPath)

        ## output
        self.resultsPath1 = re.sub("\.csv", "-scparams-sv.csv", distancePath)
        self.resultsPath2 = re.sub("\.csv", "-scparams-cl.csv", distancePath)
def mp_worker((k, sigma, distancePath, dtype)):
    """
    run spectral clustering
    """

    sc = SpectralCluster(distancePath, dtype=dtype)
    sc.run(k, sk=None, sigma=sigma, verbose=True)

    return sc
    def _run_sc(self, toRun):
        """
        run spectral clustering (single core)
        """

        for params in toRun:
            k, sigma, dpath, dtype = params
            sc = SpectralCluster(self.distancePath, dtype=dtype)
            sc.run(k, sk=None, sigma=sigma, verbose=True)
            clusterSizes = self.get_cluster_sizes(sc)
            self.writer1.writerow([k, sigma] + [round(sc.avgSilValue, 4)])
            self.writer2.writerow([k, sigma] + clusterSizes)
Esempio n. 4
0
    scps.run(chunks=5, kRange=range(3, 11))

## plot the parameter search
psFigureFile = os.path.join(gsaDir, "param-scan-%s.png" % (_aspect))
if not os.path.exists(psFigureFile):
    scr = SpectralClusterResults(silvalFile, clustersFile)
    scr.plot(figName=psFigureFile)

## run spectral clustering
k = 3
sigma = 0.43

labelsPath = os.path.join(gsaDir, "sc-labels-%s.csv" % (_aspect))

if not os.path.exists(labelsPath):
    sc = SpectralCluster(geneDistancePath, dtype='distance')
    sc.run(k, sk=None, sigma=sigma, verbose=True)
    sc.save(labelsPath=labelsPath)

import networkx
from parse_KGML import KGML2Graph
from KeggPathway import KeggPathway

p = KeggPathway()
#p.add_node('gene1', data={'type': 'gene', })
#p.get_node('gene1')
#{'type': 'gene'}

graphfile = "%s.xml" % pathway
graph = KGML2Graph(graphfile)[1]