def testRightVersion(self): repo = datarepo.SqlDataRepository(self._repoPath) repo.open(datarepo.MODE_WRITE) repo.initialise() anotherRepo = datarepo.SqlDataRepository(self._repoPath) anotherRepo.open(datarepo.MODE_READ) self.assertEquals(anotherRepo._schemaVersion, str(repo.version))
def testWrongVersion(self): repo = datarepo.SqlDataRepository(self._repoPath) repo.version = datarepo.SqlDataRepository.SchemaVersion( "wrong.version") repo.open(datarepo.MODE_WRITE) repo.initialise() anotherRepo = datarepo.SqlDataRepository(self._repoPath) with self.assertRaises(exceptions.RepoSchemaVersionMismatchException): anotherRepo.open(datarepo.MODE_READ)
def __init__(self, inputDirectory, outputDirectory): """ Converts human readable dataset from compliance repository, and translates it into a reference-server readable filesystem with binary files. :param inputDirectory: location of the human readable compliance dataset :param outputDirectory: location of the file hierarchy suitable for deploying on the reference server """ self.inputDirectory = inputDirectory self.outputDirectory = outputDirectory self.repoPath = os.path.join(outputDirectory, "repo.db") self.tempdir = None # If no input directory is specified download from GitHub if inputDirectory is None: utils.log("Downloading test data...") self.tempdir = tempfile.mkdtemp() assert(os.path.exists(self.tempdir)) url = "https://github.com/ga4gh/compliance/archive/master.zip" filePath = os.path.join(self.tempdir, 'compliance-master.zip') downloader = utils.HttpFileDownloader(url, filePath) downloader.download() utils.log("Extracting test data...") with zipfile.ZipFile(filePath, "r") as z: z.extractall(self.tempdir) self.inputDirectory = os.path.join( self.tempdir, 'compliance-master', 'test-data') repo = datarepo.SqlDataRepository(self.repoPath) self.repo = repo
def setUp(self): self._maxDiff = None repoPath = paths.testDataRepo self._dataUrl = "file://{}".format(repoPath) dataRepository = datarepo.SqlDataRepository(repoPath) dataRepository.open(datarepo.MODE_READ) self._backend = backend.Backend(dataRepository) self._client = client.LocalClient(self._backend)
def __init__(self, featureSetLocalName, dataPath): """ :param localId: Name of the GFF3 resource corresponding to a pair of files, .db and .gff3 :param dataPath: string representing full path to the .db file :return: """ self._dataset = datasets.Dataset(_datasetName) self._repo = datarepo.SqlDataRepository(paths.testDataRepo) self._repo.open(datarepo.MODE_READ) self._ontology = self._repo.getOntologyByName(paths.ontologyName) self._referenceSet = references.AbstractReferenceSet("test_rs") featureSetLocalName = featureSetLocalName[:-3] # remove '.db' self._testData = _testDataForFeatureSetName[featureSetLocalName] super(FeatureSetTests, self).__init__(featureSetLocalName, dataPath)
def _createVariantAnnotationSet(self, vcfDir): """ Creates a VariantAnnotationSet from the specified directory of VCF files. """ self._variantSetName = "testVariantSet" self._repo = datarepo.SqlDataRepository(paths.testDataRepo) self._repo.open(datarepo.MODE_READ) self._dataset = datasets.Dataset("testDs") self._variantSet = variants.HtslibVariantSet( self._dataset, self._variantSetName) self._variantSet.populateFromDirectory(vcfDir) self._variantAnnotationSet = variants.HtslibVariantAnnotationSet( self._variantSet, "testVAs") self._variantAnnotationSet.setOntology( self._repo.getOntologyByName(paths.ontologyName))
def __init__(self, inputDirectory, outputDirectory, force): """ Converts human readable dataset from compliance repository, and translates it into a reference-server readable filesystem with binary files. :param inputDirectory: location of the human readable compliance dataset :param outputDirectory: location of the file hierarchy suitable for deploying on the reference server """ self.inputDirectory = inputDirectory self.outputDirectory = outputDirectory self.repoPath = os.path.abspath( os.path.join(outputDirectory, "registry.db")) self.tempdir = None if os.path.exists(self.outputDirectory): if force: utils.log("Removing existing output directory at '{}'".format( self.outputDirectory)) shutil.rmtree(self.outputDirectory) else: utils.log("Output directory '{}' already exists".format( self.outputDirectory)) utils.log("Please specify an output path that does not exist") utils.log("Exiting...") exit(1) # If no input directory is specified download from GitHub if inputDirectory is None: utils.log("Downloading test data...") self.tempdir = tempfile.mkdtemp() assert (os.path.exists(self.tempdir)) url = "https://github.com/ga4gh/compliance/archive/master.zip" filePath = os.path.join(self.tempdir, 'compliance-master.zip') downloader = utils.HttpFileDownloader(url, filePath) downloader.download() utils.log("Extracting test data...") with zipfile.ZipFile(filePath, "r") as z: z.extractall(self.tempdir) self.inputDirectory = os.path.join(self.tempdir, 'compliance-master', 'test-data') repo = datarepo.SqlDataRepository(self.repoPath) self.repo = repo
def createRepo(self): """ Creates the repository for all the data we've just downloaded. """ repo = datarepo.SqlDataRepository(self.repoPath) repo.open("w") repo.initialise() referenceSet = references.HtslibReferenceSet("GRCh37-subset") referenceSet.populateFromFile(self.fastaFilePath) referenceSet.setDescription("Subset of GRCh37 used for demonstration") referenceSet.setNcbiTaxonId(9606) for reference in referenceSet.getReferences(): reference.setNcbiTaxonId(9606) reference.setSourceAccessions( self.accessions[reference.getName()] + ".subset") repo.insertReferenceSet(referenceSet) dataset = datasets.Dataset("1kg-p3-subset") dataset.setDescription("Sample data from 1000 Genomes phase 3") repo.insertDataset(dataset) variantSet = variants.HtslibVariantSet(dataset, "mvncall") variantSet.setReferenceSet(referenceSet) dataUrls = [vcfFile for vcfFile, _ in self.vcfFilePaths] indexFiles = [indexFile for _, indexFile in self.vcfFilePaths] variantSet.populateFromFile(dataUrls, indexFiles) variantSet.checkConsistency() repo.insertVariantSet(variantSet) for sample, (bamFile, indexFile) in zip(self.samples, self.bamFilePaths): readGroupSet = reads.HtslibReadGroupSet(dataset, sample) readGroupSet.populateFromFile(bamFile, indexFile) readGroupSet.setReferenceSet(referenceSet) repo.insertReadGroupSet(readGroupSet) repo.commit() repo.close() self.log("Finished creating the repository; summary:\n") repo.open("r") repo.printSummary()
def __init__(self, registryDb): repo = datarepo.SqlDataRepository(registryDb) repo.open(datarepo.MODE_READ) super(CpuProfilerBackend, self).__init__(repo) self.profiler = cProfile.Profile()
def __init__(self, registryDb): repo = datarepo.SqlDataRepository(registryDb) repo.open(datarepo.MODE_READ) super(HeapProfilerBackend, self).__init__(repo) self.profiler = guppy.hpy()
""") args = parser.parse_args() registryDb = "ga4gh-example-data/registry.db" if args.profile == 'heap': backendClass = HeapProfilerBackend backend = backendClass(registryDb) args.repeatLimit = 1 args.pageLimit = 1 elif args.profile == 'cpu': backendClass = CpuProfilerBackend backend = backendClass(registryDb) else: repo = datarepo.SqlDataRepository(registryDb) repo.open(datarepo.MODE_READ) backend = backend.Backend(repo) # Get our list of callSetids callSetIds = args.callSetIds if callSetIds != []: callSetIds = None if args.callSetIds != "*": callSetIds = args.callSetIds.split(",") minTime = benchmarkOneQuery(_heavyQuery(args.variantSetId, callSetIds), args.repeatLimit, args.pageLimit) print(minTime) if args.profile == 'cpu': stats = pstats.Stats(backend.profiler)
def __init__(self, args): self._args = args self._registryPath = args.registryPath self._repo = datarepo.SqlDataRepository(self._registryPath)
def data_repo(path): dataRepository = datarepo.SqlDataRepository(path) dataRepository.open(datarepo.MODE_READ) return dataRepository
def testNonexistantFile(self): repo = datarepo.SqlDataRepository("aFilePathThatDoesNotExist") with self.assertRaises(exceptions.RepoNotFoundException): repo.open(datarepo.MODE_READ)
def testDirectory(self): repoPath = makeTempDir() repo = datarepo.SqlDataRepository(repoPath) with self.assertRaises(exceptions.RepoInvalidDatabaseException): repo.open(datarepo.MODE_READ)
def testTextFile(self): with open(self._repoPath, 'w') as textFile: textFile.write('This is now a text file') repo = datarepo.SqlDataRepository(self._repoPath) with self.assertRaises(exceptions.RepoInvalidDatabaseException): repo.open(datarepo.MODE_READ)
def testDbFileWithoutTables(self): repo = datarepo.SqlDataRepository(self._repoPath) with self.assertRaises(exceptions.RepoInvalidDatabaseException): repo.open(datarepo.MODE_READ)
def readRepo(self): repo = datarepo.SqlDataRepository(self._repoPath) repo.open(datarepo.MODE_READ) return repo
def setUp(self): self._dataRepo = datarepo.SqlDataRepository(paths.testDataRepo) self._dataRepo.open(datarepo.MODE_READ)
def configure(configFile=None, baseConfig="ProductionConfig", port=8000, extraConfig={}): """ TODO Document this critical function! What does it do? What does it assume? """ file_handler = StreamHandler() file_handler.setLevel(logging.WARNING) app.logger.addHandler(file_handler) configStr = 'ga4gh.serverconfig:{0}'.format(baseConfig) app.config.from_object(configStr) if os.environ.get('GA4GH_CONFIGURATION') is not None: app.config.from_envvar('GA4GH_CONFIGURATION') if configFile is not None: app.config.from_pyfile(configFile) app.config.update(extraConfig.items()) # Setup file handle cache max size datamodel.fileHandleCache.setMaxCacheSize( app.config["FILE_HANDLE_CACHE_MAX_SIZE"]) # Setup CORS cors.CORS(app, allow_headers='Content-Type') app.serverStatus = ServerStatus() # Allocate the backend # We use URLs to specify the backend. Currently we have file:// URLs (or # URLs with no scheme) for the SqlDataRepository, and special empty:// and # simulated:// URLs for empty or simulated data sources. dataSource = urlparse.urlparse(app.config["DATA_SOURCE"], "file") if dataSource.scheme == "simulated": # Ignore the query string randomSeed = app.config["SIMULATED_BACKEND_RANDOM_SEED"] numCalls = app.config["SIMULATED_BACKEND_NUM_CALLS"] variantDensity = app.config["SIMULATED_BACKEND_VARIANT_DENSITY"] numVariantSets = app.config["SIMULATED_BACKEND_NUM_VARIANT_SETS"] numReferenceSets = app.config[ "SIMULATED_BACKEND_NUM_REFERENCE_SETS"] numReferencesPerReferenceSet = app.config[ "SIMULATED_BACKEND_NUM_REFERENCES_PER_REFERENCE_SET"] numAlignmentsPerReadGroup = app.config[ "SIMULATED_BACKEND_NUM_ALIGNMENTS_PER_READ_GROUP"] numReadGroupsPerReadGroupSet = app.config[ "SIMULATED_BACKEND_NUM_READ_GROUPS_PER_READ_GROUP_SET"] dataRepository = datarepo.SimulatedDataRepository( randomSeed=randomSeed, numCalls=numCalls, variantDensity=variantDensity, numVariantSets=numVariantSets, numReferenceSets=numReferenceSets, numReferencesPerReferenceSet=numReferencesPerReferenceSet, numReadGroupsPerReadGroupSet=numReadGroupsPerReadGroupSet, numAlignments=numAlignmentsPerReadGroup) elif dataSource.scheme == "empty": dataRepository = datarepo.EmptyDataRepository() elif dataSource.scheme == "file": path = os.path.join(dataSource.netloc, dataSource.path) dataRepository = datarepo.SqlDataRepository(path) dataRepository.open(datarepo.MODE_READ) else: raise exceptions.ConfigurationException( "Unsupported data source scheme: " + dataSource.scheme) theBackend = backend.Backend(dataRepository) theBackend.setRequestValidation(app.config["REQUEST_VALIDATION"]) theBackend.setResponseValidation(app.config["RESPONSE_VALIDATION"]) theBackend.setDefaultPageSize(app.config["DEFAULT_PAGE_SIZE"]) theBackend.setMaxResponseLength(app.config["MAX_RESPONSE_LENGTH"]) app.backend = theBackend app.secret_key = os.urandom(SECRET_KEY_LENGTH) app.oidcClient = None app.tokenMap = None app.myPort = port if "OIDC_PROVIDER" in app.config: # The oic client. If we're testing, we don't want to verify # SSL certificates app.oidcClient = oic.oic.Client( verify_ssl=('TESTING' not in app.config)) app.tokenMap = {} try: app.oidcClient.provider_config(app.config['OIDC_PROVIDER']) except requests.exceptions.ConnectionError: configResponse = message.ProviderConfigurationResponse( issuer=app.config['OIDC_PROVIDER'], authorization_endpoint=app.config['OIDC_AUTHZ_ENDPOINT'], token_endpoint=app.config['OIDC_TOKEN_ENDPOINT'], revocation_endpoint=app.config['OIDC_TOKEN_REV_ENDPOINT']) app.oidcClient.handle_provider_config(configResponse, app.config['OIDC_PROVIDER']) # The redirect URI comes from the configuration. # If we are testing, then we allow the automatic creation of a # redirect uri if none is configured redirectUri = app.config.get('OIDC_REDIRECT_URI') if redirectUri is None and 'TESTING' in app.config: redirectUri = 'https://{0}:{1}/oauth2callback'.format( socket.gethostname(), app.myPort) app.oidcClient.redirect_uris = [redirectUri] if redirectUri is []: raise exceptions.ConfigurationException( 'OIDC configuration requires a redirect uri') # We only support dynamic registration while testing. if ('registration_endpoint' in app.oidcClient.provider_info and 'TESTING' in app.config): app.oidcClient.register( app.oidcClient.provider_info["registration_endpoint"], redirect_uris=[redirectUri]) else: response = message.RegistrationResponse( client_id=app.config['OIDC_CLIENT_ID'], client_secret=app.config['OIDC_CLIENT_SECRET'], redirect_uris=[redirectUri], verify_ssl=False) app.oidcClient.store_registration_info(response)
def setUp(self): self._repo = datarepo.SqlDataRepository(paths.testDataRepo) self._repo.open(datarepo.MODE_READ) self._backend = backend.Backend(self._repo) self._client = client.LocalClient(self._backend)