Beispiel #1
0
 def testRightVersion(self):
     repo = datarepo.SqlDataRepository(self._repoPath)
     repo.open(datarepo.MODE_WRITE)
     repo.initialise()
     anotherRepo = datarepo.SqlDataRepository(self._repoPath)
     anotherRepo.open(datarepo.MODE_READ)
     self.assertEquals(anotherRepo._schemaVersion, str(repo.version))
Beispiel #2
0
 def testWrongVersion(self):
     repo = datarepo.SqlDataRepository(self._repoPath)
     repo.version = datarepo.SqlDataRepository.SchemaVersion(
         "wrong.version")
     repo.open(datarepo.MODE_WRITE)
     repo.initialise()
     anotherRepo = datarepo.SqlDataRepository(self._repoPath)
     with self.assertRaises(exceptions.RepoSchemaVersionMismatchException):
         anotherRepo.open(datarepo.MODE_READ)
    def __init__(self, inputDirectory, outputDirectory):
        """
        Converts human readable dataset from compliance repository,
        and translates it into a reference-server readable filesystem
        with binary files.
        :param inputDirectory: location of
            the human readable compliance dataset
        :param outputDirectory: location of
            the file hierarchy suitable for deploying on the reference server
        """
        self.inputDirectory = inputDirectory
        self.outputDirectory = outputDirectory
        self.repoPath = os.path.join(outputDirectory, "repo.db")
        self.tempdir = None

        # If no input directory is specified download from GitHub
        if inputDirectory is None:
            utils.log("Downloading test data...")
            self.tempdir = tempfile.mkdtemp()
            assert(os.path.exists(self.tempdir))
            url = "https://github.com/ga4gh/compliance/archive/master.zip"
            filePath = os.path.join(self.tempdir, 'compliance-master.zip')
            downloader = utils.HttpFileDownloader(url, filePath)
            downloader.download()
            utils.log("Extracting test data...")
            with zipfile.ZipFile(filePath, "r") as z:
                z.extractall(self.tempdir)
            self.inputDirectory = os.path.join(
                self.tempdir, 'compliance-master', 'test-data')
        repo = datarepo.SqlDataRepository(self.repoPath)
        self.repo = repo
 def setUp(self):
     self._maxDiff = None
     repoPath = paths.testDataRepo
     self._dataUrl = "file://{}".format(repoPath)
     dataRepository = datarepo.SqlDataRepository(repoPath)
     dataRepository.open(datarepo.MODE_READ)
     self._backend = backend.Backend(dataRepository)
     self._client = client.LocalClient(self._backend)
Beispiel #5
0
 def __init__(self, featureSetLocalName, dataPath):
     """
     :param localId: Name of the GFF3 resource corresponding to a pair
     of files, .db and .gff3
     :param dataPath: string representing full path to the .db file
     :return:
     """
     self._dataset = datasets.Dataset(_datasetName)
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._ontology = self._repo.getOntologyByName(paths.ontologyName)
     self._referenceSet = references.AbstractReferenceSet("test_rs")
     featureSetLocalName = featureSetLocalName[:-3]  # remove '.db'
     self._testData = _testDataForFeatureSetName[featureSetLocalName]
     super(FeatureSetTests, self).__init__(featureSetLocalName, dataPath)
Beispiel #6
0
 def _createVariantAnnotationSet(self, vcfDir):
     """
     Creates a VariantAnnotationSet from the specified directory of
     VCF files.
     """
     self._variantSetName = "testVariantSet"
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._dataset = datasets.Dataset("testDs")
     self._variantSet = variants.HtslibVariantSet(
         self._dataset, self._variantSetName)
     self._variantSet.populateFromDirectory(vcfDir)
     self._variantAnnotationSet = variants.HtslibVariantAnnotationSet(
         self._variantSet, "testVAs")
     self._variantAnnotationSet.setOntology(
         self._repo.getOntologyByName(paths.ontologyName))
    def __init__(self, inputDirectory, outputDirectory, force):
        """
        Converts human readable dataset from compliance repository,
        and translates it into a reference-server readable filesystem
        with binary files.
        :param inputDirectory: location of
            the human readable compliance dataset
        :param outputDirectory: location of
            the file hierarchy suitable for deploying on the reference server
        """
        self.inputDirectory = inputDirectory
        self.outputDirectory = outputDirectory
        self.repoPath = os.path.abspath(
            os.path.join(outputDirectory, "registry.db"))
        self.tempdir = None

        if os.path.exists(self.outputDirectory):
            if force:
                utils.log("Removing existing output directory at '{}'".format(
                    self.outputDirectory))
                shutil.rmtree(self.outputDirectory)
            else:
                utils.log("Output directory '{}' already exists".format(
                    self.outputDirectory))
                utils.log("Please specify an output path that does not exist")
                utils.log("Exiting...")
                exit(1)

        # If no input directory is specified download from GitHub
        if inputDirectory is None:
            utils.log("Downloading test data...")
            self.tempdir = tempfile.mkdtemp()
            assert (os.path.exists(self.tempdir))
            url = "https://github.com/ga4gh/compliance/archive/master.zip"
            filePath = os.path.join(self.tempdir, 'compliance-master.zip')
            downloader = utils.HttpFileDownloader(url, filePath)
            downloader.download()
            utils.log("Extracting test data...")
            with zipfile.ZipFile(filePath, "r") as z:
                z.extractall(self.tempdir)
            self.inputDirectory = os.path.join(self.tempdir,
                                               'compliance-master',
                                               'test-data')
        repo = datarepo.SqlDataRepository(self.repoPath)
        self.repo = repo
    def createRepo(self):
        """
        Creates the repository for all the data we've just downloaded.
        """
        repo = datarepo.SqlDataRepository(self.repoPath)
        repo.open("w")
        repo.initialise()

        referenceSet = references.HtslibReferenceSet("GRCh37-subset")
        referenceSet.populateFromFile(self.fastaFilePath)
        referenceSet.setDescription("Subset of GRCh37 used for demonstration")
        referenceSet.setNcbiTaxonId(9606)
        for reference in referenceSet.getReferences():
            reference.setNcbiTaxonId(9606)
            reference.setSourceAccessions(
                self.accessions[reference.getName()] + ".subset")
        repo.insertReferenceSet(referenceSet)

        dataset = datasets.Dataset("1kg-p3-subset")
        dataset.setDescription("Sample data from 1000 Genomes phase 3")
        repo.insertDataset(dataset)

        variantSet = variants.HtslibVariantSet(dataset, "mvncall")
        variantSet.setReferenceSet(referenceSet)
        dataUrls = [vcfFile for vcfFile, _ in self.vcfFilePaths]
        indexFiles = [indexFile for _, indexFile in self.vcfFilePaths]
        variantSet.populateFromFile(dataUrls, indexFiles)
        variantSet.checkConsistency()
        repo.insertVariantSet(variantSet)

        for sample, (bamFile, indexFile) in zip(self.samples,
                                                self.bamFilePaths):
            readGroupSet = reads.HtslibReadGroupSet(dataset, sample)
            readGroupSet.populateFromFile(bamFile, indexFile)
            readGroupSet.setReferenceSet(referenceSet)
            repo.insertReadGroupSet(readGroupSet)

        repo.commit()
        repo.close()
        self.log("Finished creating the repository; summary:\n")
        repo.open("r")
        repo.printSummary()
Beispiel #9
0
 def __init__(self, registryDb):
     repo = datarepo.SqlDataRepository(registryDb)
     repo.open(datarepo.MODE_READ)
     super(CpuProfilerBackend, self).__init__(repo)
     self.profiler = cProfile.Profile()
Beispiel #10
0
 def __init__(self, registryDb):
     repo = datarepo.SqlDataRepository(registryDb)
     repo.open(datarepo.MODE_READ)
     super(HeapProfilerBackend, self).__init__(repo)
     self.profiler = guppy.hpy()
Beispiel #11
0
            """)

    args = parser.parse_args()

    registryDb = "ga4gh-example-data/registry.db"

    if args.profile == 'heap':
        backendClass = HeapProfilerBackend
        backend = backendClass(registryDb)
        args.repeatLimit = 1
        args.pageLimit = 1
    elif args.profile == 'cpu':
        backendClass = CpuProfilerBackend
        backend = backendClass(registryDb)
    else:
        repo = datarepo.SqlDataRepository(registryDb)
        repo.open(datarepo.MODE_READ)
        backend = backend.Backend(repo)
    # Get our list of callSetids
    callSetIds = args.callSetIds
    if callSetIds != []:
        callSetIds = None
        if args.callSetIds != "*":
            callSetIds = args.callSetIds.split(",")

    minTime = benchmarkOneQuery(_heavyQuery(args.variantSetId, callSetIds),
                                args.repeatLimit, args.pageLimit)
    print(minTime)

    if args.profile == 'cpu':
        stats = pstats.Stats(backend.profiler)
Beispiel #12
0
 def __init__(self, args):
     self._args = args
     self._registryPath = args.registryPath
     self._repo = datarepo.SqlDataRepository(self._registryPath)
Beispiel #13
0
def data_repo(path):
    dataRepository = datarepo.SqlDataRepository(path)
    dataRepository.open(datarepo.MODE_READ)
    return dataRepository
Beispiel #14
0
 def testNonexistantFile(self):
     repo = datarepo.SqlDataRepository("aFilePathThatDoesNotExist")
     with self.assertRaises(exceptions.RepoNotFoundException):
         repo.open(datarepo.MODE_READ)
Beispiel #15
0
 def testDirectory(self):
     repoPath = makeTempDir()
     repo = datarepo.SqlDataRepository(repoPath)
     with self.assertRaises(exceptions.RepoInvalidDatabaseException):
         repo.open(datarepo.MODE_READ)
Beispiel #16
0
 def testTextFile(self):
     with open(self._repoPath, 'w') as textFile:
         textFile.write('This is now a text file')
     repo = datarepo.SqlDataRepository(self._repoPath)
     with self.assertRaises(exceptions.RepoInvalidDatabaseException):
         repo.open(datarepo.MODE_READ)
Beispiel #17
0
 def testDbFileWithoutTables(self):
     repo = datarepo.SqlDataRepository(self._repoPath)
     with self.assertRaises(exceptions.RepoInvalidDatabaseException):
         repo.open(datarepo.MODE_READ)
 def readRepo(self):
     repo = datarepo.SqlDataRepository(self._repoPath)
     repo.open(datarepo.MODE_READ)
     return repo
Beispiel #19
0
 def setUp(self):
     self._dataRepo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._dataRepo.open(datarepo.MODE_READ)
Beispiel #20
0
def configure(configFile=None, baseConfig="ProductionConfig",
              port=8000, extraConfig={}):
    """
    TODO Document this critical function! What does it do? What does
    it assume?
    """
    file_handler = StreamHandler()
    file_handler.setLevel(logging.WARNING)
    app.logger.addHandler(file_handler)
    configStr = 'ga4gh.serverconfig:{0}'.format(baseConfig)
    app.config.from_object(configStr)
    if os.environ.get('GA4GH_CONFIGURATION') is not None:
        app.config.from_envvar('GA4GH_CONFIGURATION')
    if configFile is not None:
        app.config.from_pyfile(configFile)
    app.config.update(extraConfig.items())
    # Setup file handle cache max size
    datamodel.fileHandleCache.setMaxCacheSize(
        app.config["FILE_HANDLE_CACHE_MAX_SIZE"])
    # Setup CORS
    cors.CORS(app, allow_headers='Content-Type')
    app.serverStatus = ServerStatus()
    # Allocate the backend
    # We use URLs to specify the backend. Currently we have file:// URLs (or
    # URLs with no scheme) for the SqlDataRepository, and special empty:// and
    # simulated:// URLs for empty or simulated data sources.
    dataSource = urlparse.urlparse(app.config["DATA_SOURCE"], "file")

    if dataSource.scheme == "simulated":
        # Ignore the query string
        randomSeed = app.config["SIMULATED_BACKEND_RANDOM_SEED"]
        numCalls = app.config["SIMULATED_BACKEND_NUM_CALLS"]
        variantDensity = app.config["SIMULATED_BACKEND_VARIANT_DENSITY"]
        numVariantSets = app.config["SIMULATED_BACKEND_NUM_VARIANT_SETS"]
        numReferenceSets = app.config[
            "SIMULATED_BACKEND_NUM_REFERENCE_SETS"]
        numReferencesPerReferenceSet = app.config[
            "SIMULATED_BACKEND_NUM_REFERENCES_PER_REFERENCE_SET"]
        numAlignmentsPerReadGroup = app.config[
            "SIMULATED_BACKEND_NUM_ALIGNMENTS_PER_READ_GROUP"]
        numReadGroupsPerReadGroupSet = app.config[
            "SIMULATED_BACKEND_NUM_READ_GROUPS_PER_READ_GROUP_SET"]
        dataRepository = datarepo.SimulatedDataRepository(
            randomSeed=randomSeed, numCalls=numCalls,
            variantDensity=variantDensity, numVariantSets=numVariantSets,
            numReferenceSets=numReferenceSets,
            numReferencesPerReferenceSet=numReferencesPerReferenceSet,
            numReadGroupsPerReadGroupSet=numReadGroupsPerReadGroupSet,
            numAlignments=numAlignmentsPerReadGroup)
    elif dataSource.scheme == "empty":
        dataRepository = datarepo.EmptyDataRepository()
    elif dataSource.scheme == "file":
        path = os.path.join(dataSource.netloc, dataSource.path)
        dataRepository = datarepo.SqlDataRepository(path)
        dataRepository.open(datarepo.MODE_READ)
    else:
        raise exceptions.ConfigurationException(
            "Unsupported data source scheme: " + dataSource.scheme)
    theBackend = backend.Backend(dataRepository)
    theBackend.setRequestValidation(app.config["REQUEST_VALIDATION"])
    theBackend.setResponseValidation(app.config["RESPONSE_VALIDATION"])
    theBackend.setDefaultPageSize(app.config["DEFAULT_PAGE_SIZE"])
    theBackend.setMaxResponseLength(app.config["MAX_RESPONSE_LENGTH"])
    app.backend = theBackend
    app.secret_key = os.urandom(SECRET_KEY_LENGTH)
    app.oidcClient = None
    app.tokenMap = None
    app.myPort = port
    if "OIDC_PROVIDER" in app.config:
        # The oic client. If we're testing, we don't want to verify
        # SSL certificates
        app.oidcClient = oic.oic.Client(
            verify_ssl=('TESTING' not in app.config))
        app.tokenMap = {}
        try:
            app.oidcClient.provider_config(app.config['OIDC_PROVIDER'])
        except requests.exceptions.ConnectionError:
            configResponse = message.ProviderConfigurationResponse(
                issuer=app.config['OIDC_PROVIDER'],
                authorization_endpoint=app.config['OIDC_AUTHZ_ENDPOINT'],
                token_endpoint=app.config['OIDC_TOKEN_ENDPOINT'],
                revocation_endpoint=app.config['OIDC_TOKEN_REV_ENDPOINT'])
            app.oidcClient.handle_provider_config(configResponse,
                                                  app.config['OIDC_PROVIDER'])

        # The redirect URI comes from the configuration.
        # If we are testing, then we allow the automatic creation of a
        # redirect uri if none is configured
        redirectUri = app.config.get('OIDC_REDIRECT_URI')
        if redirectUri is None and 'TESTING' in app.config:
            redirectUri = 'https://{0}:{1}/oauth2callback'.format(
                socket.gethostname(), app.myPort)
        app.oidcClient.redirect_uris = [redirectUri]
        if redirectUri is []:
            raise exceptions.ConfigurationException(
                'OIDC configuration requires a redirect uri')

        # We only support dynamic registration while testing.
        if ('registration_endpoint' in app.oidcClient.provider_info and
           'TESTING' in app.config):
            app.oidcClient.register(
                app.oidcClient.provider_info["registration_endpoint"],
                redirect_uris=[redirectUri])
        else:
            response = message.RegistrationResponse(
                client_id=app.config['OIDC_CLIENT_ID'],
                client_secret=app.config['OIDC_CLIENT_SECRET'],
                redirect_uris=[redirectUri],
                verify_ssl=False)
            app.oidcClient.store_registration_info(response)
Beispiel #21
0
 def setUp(self):
     self._repo = datarepo.SqlDataRepository(paths.testDataRepo)
     self._repo.open(datarepo.MODE_READ)
     self._backend = backend.Backend(self._repo)
     self._client = client.LocalClient(self._backend)