Exemple #1
0
 def testRightVersion(self):
     repo = datarepo.SqlDataRepository(self._repoPath)
     repo.open(datarepo.MODE_WRITE)
     repo.initialise()
     anotherRepo = datarepo.SqlDataRepository(self._repoPath)
     anotherRepo.open(datarepo.MODE_READ)
     self.assertEqual(anotherRepo._schemaVersion, str(repo.version))
Exemple #2
0
 def testWrongVersion(self):
     repo = datarepo.SqlDataRepository(self._repoPath)
     repo.version = datarepo.SqlDataRepository.SchemaVersion(
         "wrong.version")
     repo.open(datarepo.MODE_WRITE)
     repo.initialise()
     anotherRepo = datarepo.SqlDataRepository(self._repoPath)
     with self.assertRaises(
             exceptions.RepoSchemaVersionMismatchException):
         anotherRepo.open(datarepo.MODE_READ)
Exemple #3
0
 def setUp(self):
     dataRepository = datarepo.SqlDataRepository(paths.testDataRepo)
     dataRepository.open(datarepo.MODE_READ)
     self.backend = backend.Backend(dataRepository)
     self.dataset = self.backend.getDataRepository().getDatasets()[0]
     self.dataset_id = self.dataset.getId()
     self.access_map = {self.dataset.getLocalId(): 4}
Exemple #4
0
    def __enter__(self):
        self._repo = repo.SqlDataRepository(self._filename)
        self._repo.open(repo.MODE_WRITE)

        if not os.path.isfile(self._filename):
            self._repo.initialise()

        return self
 def __init__(self, rnaQuantificationLocalId, baseDir):
     self._dataset = datasets.Dataset(_datasetName)
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._referenceSet = references.AbstractReferenceSet("test_rs")
     rnaQuantificationId = rnaQuantificationLocalId[:-3]  # remove '.db'
     super(RnaQuantificationTest, self).__init__(
         rnaQuantificationId, baseDir)
 def setUp(self):
     self._maxDiff = None
     repoPath = paths.testDataRepo
     self._dataUrl = moduleTestServer.getUrl()
     dataRepository = datarepo.SqlDataRepository(repoPath)
     dataRepository.open(datarepo.MODE_READ)
     self._backend = backend.Backend(dataRepository)
     self._client = client.LocalClient(self._backend,
                                       serialization="application/json")
 def _createContinuousSet(self):
     """
     Creates a ContinuousSet from the specified directory.
     """
     self._continuousSetName = "testContinuous"
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._dataset = datasets.Dataset("testDs")
     self._continuousSet = continuous.readSet(
         self._dataset, self._continuousSetName)
Exemple #8
0
 def __init__(self, featureSetLocalName, dataPath):
     """
     :param localId: Name of the GFF3 resource corresponding to a pair
     of files, .db and .gff3
     :param dataPath: string representing full path to the .db file
     :return:
     """
     self._dataset = datasets.Dataset(_datasetName)
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._ontology = self._repo.getOntologyByName(paths.ontologyName)
     self._referenceSet = references.AbstractReferenceSet("test_rs")
     featureSetLocalName = featureSetLocalName[:-3]  # remove '.db'
     self._testData = _testDataForFeatureSetName[featureSetLocalName]
     super(FeatureSetTests, self).__init__(featureSetLocalName, dataPath)
Exemple #9
0
 def _createVariantAnnotationSet(self, vcfDir):
     """
     Creates a VariantAnnotationSet from the specified directory of
     VCF files.
     """
     self._variantSetName = "testVariantSet"
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._dataset = datasets.Dataset("testDs")
     self._variantSet = variants.HtslibVariantSet(self._dataset,
                                                  self._variantSetName)
     self._variantSet.populateFromDirectory(vcfDir)
     self._variantAnnotationSet = variants.HtslibVariantAnnotationSet(
         self._variantSet, "testVAs")
     self._variantAnnotationSet.setOntology(
         self._repo.getOntologyByName(paths.ontologyName))
Exemple #10
0
    def createRepo(self):
        """
        Creates the repository for all the data we've just downloaded.
        """
        repo = datarepo.SqlDataRepository(self.repoPath)
        repo.open("w")
        repo.initialise()

        referenceSet = references.HtslibReferenceSet("GRCh37-subset")
        referenceSet.populateFromFile(self.fastaFilePath)
        referenceSet.setDescription("Subset of GRCh37 used for demonstration")
        referenceSet.setSpeciesFromJson(
            '{"id": "9606",'
            + '"term": "H**o sapiens", "source_name": "NCBI"}')
        for reference in referenceSet.getReferences():
            reference.setSpeciesFromJson(
                '{"id": "9606",'
                + '"term": "H**o sapiens", "source_name": "NCBI"}')
            reference.setSourceAccessions(
                self.accessions[reference.getName()] + ".subset")
        repo.insertReferenceSet(referenceSet)

        dataset = datasets.Dataset("1kg-p3-subset")
        dataset.setDescription("Sample data from 1000 Genomes phase 3")
        repo.insertDataset(dataset)

        variantSet = variants.HtslibVariantSet(dataset, "mvncall")
        variantSet.setReferenceSet(referenceSet)
        dataUrls = [vcfFile for vcfFile, _ in self.vcfFilePaths]
        indexFiles = [indexFile for _, indexFile in self.vcfFilePaths]
        variantSet.populateFromFile(dataUrls, indexFiles)
        variantSet.checkConsistency()
        repo.insertVariantSet(variantSet)

        for sample, (bamFile, indexFile) in zip(
                self.samples, self.bamFilePaths):
            readGroupSet = reads.HtslibReadGroupSet(dataset, sample)
            readGroupSet.populateFromFile(bamFile, indexFile)
            readGroupSet.setReferenceSet(referenceSet)
            repo.insertReadGroupSet(readGroupSet)

        repo.commit()
        repo.close()
        self.log("Finished creating the repository; summary:\n")
        repo.open("r")
        repo.printSummary()
Exemple #11
0
    def __init__(self, inputDirectory, outputDirectory, force):
        """
        Converts human readable dataset from compliance repository,
        and translates it into a reference-server readable filesystem
        with binary files.
        :param inputDirectory: location of
            the human readable compliance dataset
        :param outputDirectory: location of
            the file hierarchy suitable for deploying on the reference server
        """
        self.inputDirectory = inputDirectory
        self.outputDirectory = outputDirectory
        self.repoPath = os.path.abspath(
            os.path.join(outputDirectory, "registry.db"))
        self.tempdir = None

        if os.path.exists(self.outputDirectory):
            if force:
                utils.log("Removing existing output directory at '{}'".format(
                    self.outputDirectory))
                shutil.rmtree(self.outputDirectory)
            else:
                utils.log("Output directory '{}' already exists".format(
                    self.outputDirectory))
                utils.log("Please specify an output path that does not exist")
                utils.log("Exiting...")
                exit(1)

        # If no input directory is specified download from GitHub
        if inputDirectory is None:
            utils.log("Downloading test data...")
            self.tempdir = tempfile.mkdtemp()
            assert (os.path.exists(self.tempdir))
            url = "https://github.com/ga4gh/compliance/archive/master.zip"
            filePath = os.path.join(self.tempdir, 'compliance-master.zip')
            downloader = file_downloader.HttpFileDownloader(url, filePath)
            downloader.download()
            utils.log("Extracting test data...")
            with zipfile.ZipFile(filePath, "r") as z:
                z.extractall(self.tempdir)
            self.inputDirectory = os.path.join(self.tempdir,
                                               'compliance-master',
                                               'test-data')
        repo = datarepo.SqlDataRepository(self.repoPath)
        self.repo = repo
Exemple #12
0
 def __init__(self, registryDb):
     repo = datarepo.SqlDataRepository(registryDb)
     repo.open(datarepo.MODE_READ)
     super(HeapProfilerBackend, self).__init__(repo)
     self.profiler = guppy.hpy()
Exemple #13
0
 def setUp(self):
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._backend = backend.Backend(self._repo)
     self._client = client.LocalClient(self._backend)
Exemple #14
0
 def testDbFileWithoutTables(self):
     repo = datarepo.SqlDataRepository(self._repoPath)
     with self.assertRaises(exceptions.RepoInvalidDatabaseException):
         repo.open(datarepo.MODE_READ)
Exemple #15
0
 def __init__(self, registryDb):
     repo = datarepo.SqlDataRepository(registryDb)
     repo.open(datarepo.MODE_READ)
     super(CpuProfilerBackend, self).__init__(repo)
     self.profiler = cProfile.Profile()
Exemple #16
0
 def testTextFile(self):
     with open(self._repoPath, 'w') as textFile:
         textFile.write('This is now a text file')
     repo = datarepo.SqlDataRepository(self._repoPath)
     with self.assertRaises(exceptions.RepoInvalidDatabaseException):
         repo.open(datarepo.MODE_READ)
Exemple #17
0
 def testDirectory(self):
     repoPath = makeTempDir()
     repo = datarepo.SqlDataRepository(repoPath)
     with self.assertRaises(exceptions.RepoInvalidDatabaseException):
         repo.open(datarepo.MODE_READ)
Exemple #18
0
 def testNonexistentFile(self):
     repo = datarepo.SqlDataRepository("aFilePathThatDoesNotExist")
     with self.assertRaises(exceptions.RepoNotFoundException):
         repo.open(datarepo.MODE_READ)
Exemple #19
0
 def setUp(self):
     self._dataRepo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._dataRepo.open(datarepo.MODE_READ)
Exemple #20
0
            """)

    args = parser.parse_args()

    registryDb = "candig-example-data/registry.db"

    if args.profile == 'heap':
        backendClass = HeapProfilerBackend
        backend = backendClass(registryDb)
        args.repeatLimit = 1
        args.pageLimit = 1
    elif args.profile == 'cpu':
        backendClass = CpuProfilerBackend
        backend = backendClass(registryDb)
    else:
        repo = datarepo.SqlDataRepository(registryDb)
        repo.open(datarepo.MODE_READ)
        backend = backend.Backend(repo)
    # Get our list of callSetids
    callSetIds = args.callSetIds
    if callSetIds != []:
        callSetIds = None
        if args.callSetIds != "*":
            callSetIds = args.callSetIds.split(",")

    minTime = benchmarkOneQuery(_heavyQuery(args.variantSetId, callSetIds),
                                args.repeatLimit, args.pageLimit)
    print(minTime)

    if args.profile == 'cpu':
        stats = pstats.Stats(backend.profiler)