def benchmarkOneQuery(request, repeatLimit=3, pageLimit=3): """ Repeat the query several times; perhaps don't go through *all* the pages. Returns minimum time to run backend.searchVariants() to execute the query (as far as pageLimit allows), *not* including JSON processing to prepare queries or parse responses. """ times = [] queryString = protocol.toJson(request) for i in range(0, repeatLimit): resultString, elapsedTime = timeOneSearch(queryString) accruedTime = elapsedTime pageCount = 1 token = extractNextPageToken(resultString) # Iterate to go beyond the first page of results. while token is not None and pageCount < pageLimit: pageRequest = request pageRequest.page_token = token pageRequestString = protocol.toJson(pageRequest) resultString, elapsedTime = timeOneSearch(pageRequestString) accruedTime += elapsedTime pageCount = pageCount + 1 token = extractNextPageToken(resultString) times.append(accruedTime) # TODO: more sophisticated statistics. Sometimes we want min(), # sometimes mean = sum() / len(), sometimes other measures, # perhaps exclude outliers... # If we compute average we should throw out at least the first one. # return sum(times[2:])/len(times[2:]) return min(times)
def testValidateObjects(self): # test that validation works on reference sets and references referenceSet = self._gaObject referenceSetPe = referenceSet.toProtocolElement() self.assertValid(protocol.ReferenceSet, protocol.toJson(referenceSetPe)) for gaReference in referenceSet.getReferences(): reference = protocol.toJson(gaReference.toProtocolElement()) self.assertValid(protocol.Reference, reference)
def sendSearchRequest(self, path, request, responseClass): """ Sends the specified protocol request instance as JSON, and parses the result into an instance of the specified response. """ response = self.sendJsonPostRequest(path, protocol.toJson(request)) self.assertEqual(200, response.status_code) responseData = protocol.fromJson(response.data, responseClass) self.assertTrue( protocol.validate(protocol.toJson(responseData), responseClass)) return responseData
def testVariantsValid(self): end = datamodel.PysamDatamodelMixin.vcfMax for reference_name in self._reference_names: iterator = self._gaObject.getVariants( reference_name, 0, end) for gaVariant in iterator: self.assertValid(protocol.Variant, protocol.toJson(gaVariant))
def testToProtocolElement(self): dataset = datasets.Dataset('dataset1') term = protocol.OntologyTerm() term.term = "male genotypic sex" term.id = "PATO:0020001" term.source_name = "PATO" term.source_version = pb.string("2015-11-18") # Write out a valid input print(protocol.toJsonDict(term)) validIndividual = protocol.Individual( name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z", sex=term) validIndividual.info['test'].values.add().string_value = 'test-info' # pass through protocol creation individual = bioMetadata.Individual( dataset, "test") individual.populateFromJson(protocol.toJson(validIndividual)) gaIndividual = individual.toProtocolElement() # Verify elements exist self.assertEqual(gaIndividual.created, validIndividual.created) self.assertEqual(gaIndividual.updated, validIndividual.updated) # Invalid input invalidIndividual = '{"bad:", "json"}' individual = bioMetadata.Individual(dataset, "test") # Should fail self.assertRaises( exceptions.InvalidJsonException, individual.populateFromJson, invalidIndividual)
def setUp(self): self.parser = cli_repomanager.RepoManager.getParser() self.registryPath = 'a/repo/path' self.datasetName = "datasetName" self.filePath = 'a/file/path' self.dirPath = 'a/dir/path/' self.individualName = "test" self.bioSampleName = "test" self.individual = protocol.toJson( protocol.Individual(name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z")) self.bioSample = protocol.toJson( protocol.BioSample(name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z"))
def assertRequestRaises(self, exceptionClass, url, request): """ Verifies that the specified request returns a protocol exception corresponding to the specified exception class. """ self.assertRawRequestRaises(exceptionClass, url, protocol.toJson(request))
def runGetRequest(self, obj): """ Runs a get request by converting the specified datamodel object into its protocol representation. """ protocolElement = obj.toProtocolElement() jsonString = protocol.toJson(protocolElement) return jsonString
def sendPostRequest(self, path, request): """ Sends the specified GA request object and returns the response. """ headers = { 'Content-type': 'application/json', 'Origin': self.exampleUrl, } return self.app.post( path, headers=headers, data=protocol.toJson(request))
def runGetFeature(self, id_): """ Returns JSON string of the feature object corresponding to the feature compoundID passed in. """ compoundId = datamodel.FeatureCompoundId.parse(id_) dataset = self.getDataRepository().getDataset(compoundId.dataset_id) featureSet = dataset.getFeatureSet(compoundId.feature_set_id) gaFeature = featureSet.getFeature(compoundId) jsonString = protocol.toJson(gaFeature) return jsonString
def testGoodMappings(self): ontology = self._gaObject for term in self._oboReader: self.assertIn(term.id, ontology.getTermIds(term.name)) gaTerm = ontology.getGaTermByName(term.name) self.assertTrue( protocol.validate(protocol.toJson(gaTerm), OntologyTerm)) self.assertEqual(gaTerm.term, term.name) self.assertIn(gaTerm.id, ontology.getTermIds(term.name)) self.assertEqual(gaTerm.source_version, ontology.getSourceVersion()) self.assertEqual(gaTerm.source_name, ontology.getName())
def testGenotypesSearchByNameError(self): """ Search for feature by name with a malformed regular expression. """ # setup phenotype query request = protocol.SearchFeaturesRequest() datasetName, featureSet = self.getCGDDataSetFeatureSet() request.feature_set_id = featureSet.id request.name = "*" # invalid regular expression postUrl = "features/search" response = self.sendJsonPostRequest(postUrl, protocol.toJson(request)) self.assertEqual(400, response.status_code)
def sendPostRequest(self, path, request, extraHeaders=None): """ Sends the specified GA request object and returns the response. """ headers = { 'Content-type': 'application/json', 'Origin': self.exampleUrl, } if extraHeaders: headers.update(extraHeaders) return self.client.post(path, headers=headers, data=protocol.toJson(request))
def runGetVariant(self, id_): """ Returns a variant with the given id """ compoundId = datamodel.VariantCompoundId.parse(id_) dataset = self.getDataRepository().getDataset(compoundId.dataset_id) variantSet = dataset.getVariantSet(compoundId.variant_set_id) gaVariant = variantSet.getVariant(compoundId) # TODO variant is a special case here, as it's returning a # protocol element rather than a datamodel object. We should # fix this for consistency. jsonString = protocol.toJson(gaVariant) return jsonString
def handleException(exception): """ Handles an exception that occurs somewhere in the process of handling a request. """ serverException = exception if not isinstance(exception, exceptions.BaseServerException): with app.test_request_context(): app.log_exception(exception) serverException = exceptions.getServerError(exception) error = serverException.toProtocolElement() responseStr = protocol.toJson(error) return getFlaskResponse(responseStr, serverException.httpStatus)
def handleException(exception): """ Handles an exception that occurs somewhere in the process of handling a request. """ serverException = exception if not isinstance(exception, exceptions.BaseServerException): with app.test_request_context(): app.log_exception(exception) serverException = exceptions.getServerError(exception) error = serverException.toProtocolElement() # If the exception is being viewed by a web browser, we can render a nicer # view. if flask.request and 'Accept' in flask.request.headers and \ flask.request.headers['Accept'].find('text/html') != -1: message = "<h1>Error {}</h1><pre>{}</pre>".format( serverException.httpStatus, protocol.toJson(error)) if serverException.httpStatus == 401 \ or serverException.httpStatus == 403: message += "Please try <a href=\"/login\">logging in</a>." return message else: responseStr = protocol.toJson(error) return getFlaskResponse(responseStr, serverException.httpStatus)
def runListReferenceBases(self, requestJson): """ Runs a listReferenceBases request for the specified ID and request arguments. """ # In the case when an empty post request is made to the endpoint # we instantiate an empty ListReferenceBasesRequest. if not requestJson: request = protocol.ListReferenceBasesRequest() else: try: request = protocol.fromJson(requestJson, protocol.ListReferenceBasesRequest) except protocol.json_format.ParseError: raise exceptions.InvalidJsonException(requestJson) compoundId = datamodel.ReferenceCompoundId.parse(request.reference_id) referenceSet = self.getDataRepository().getReferenceSet( compoundId.reference_set_id) reference = referenceSet.getReference(request.reference_id) start = request.start end = request.end if end == 0: # assume meant "get all" end = reference.getLength() if request.page_token: pageTokenStr = request.page_token start = paging._parsePageToken(pageTokenStr, 1)[0] chunkSize = self._maxResponseLength nextPageToken = None if start + chunkSize < end: end = start + chunkSize nextPageToken = str(start + chunkSize) sequence = reference.getBases(start, end) # build response response = protocol.ListReferenceBasesResponse() response.offset = start response.sequence = sequence if nextPageToken: response.next_page_token = nextPageToken return protocol.toJson(response)
def post(self, url, params=None, data=None): self.checkSessionParameters() assert url.startswith(self._urlPrefix) suffix = url[len(self._urlPrefix):] searchSuffix = "/search" if suffix.endswith(searchSuffix): datatype = suffix[1:-len(searchSuffix)] assert datatype in self._searchMethodMap method = self._searchMethodMap[datatype] result = method(data) else: # ListReferenceBases is an oddball and needs to be treated # separately. data = json.loads(data) args = protocol.ListReferenceBasesRequest() args.reference_id = data.get('referenceId', "") args.start = int(data.get('start', 0)) args.end = int(data.get('end', 0)) args.page_token = data.get('pageToken', "") result = self._backend.runListReferenceBases(protocol.toJson(args)) return DummyResponse(result)
def testToProtocolElement(self): dataset = datasets.Dataset('dataset1') # Write out a valid input validBioSample = protocol.BioSample( name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z") validBioSample.info['test'].values.add().string_value = 'test-info' # pass through protocol creation bioSample = bioMetadata.BioSample( dataset, "test") bioSample.populateFromJson(protocol.toJson(validBioSample)) gaBioSample = bioSample.toProtocolElement() # Verify elements exist self.assertEqual(gaBioSample.created, validBioSample.created) self.assertEqual(gaBioSample.updated, validBioSample.updated) # Invalid input invalidBioSample = '{"bad:", "json"}' bioSample = bioMetadata.Individual(dataset, "test") # Should fail self.assertRaises( exceptions.InvalidJsonException, bioSample.populateFromJson, invalidBioSample)
def testProtocolElementValid(self): self.assertValid(self.getProtocolClass(), protocol.toJson(self._gaObject.toProtocolElement()))
class RepoManagerEndToEndTest(unittest.TestCase): datasetName = 'datasetOne' metadata = {'description': 'aDescription'} individualName = "test" bioSampleName = "test" individual = protocol.toJson( protocol.Individual(name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z")) bioSample = protocol.toJson( protocol.BioSample(name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z")) def setUp(self): _, self.repoFile = tempfile.mkstemp( prefix='ga4gh_repo_manager_end2end_test') os.unlink(self.repoFile) def tearDown(self): if os.path.exists(self.repoFile): os.unlink(self.repoFile) def _runCmd(self, cmd, *args): command = [cmd, self.repoFile] + list(args) cli_repomanager.repo_main(command) def testEndToEnd(self): self._runCmd("init") self._runCmd("add-ontology", paths.ontologyPath) self._runCmd("add-referenceset", paths.faPath, '-n', paths.referenceSetName) self._runCmd("add-dataset", self.datasetName) self._runCmd("add-biosample", self.datasetName, self.bioSampleName, self.bioSample) self._runCmd("add-individual", self.datasetName, self.individualName, self.individual) self._runCmd("add-readgroupset", self.datasetName, paths.bamPath, '-R', paths.referenceSetName, '-n', paths.readGroupSetName) self._runCmd("add-featureset", self.datasetName, paths.featuresPath, '-R', paths.referenceSetName, '-O', paths.ontologyName) # ensure we can handle trailing slashes vcfPath = paths.vcfDirPath + '/' self._runCmd("add-variantset", self.datasetName, vcfPath, '-R', paths.referenceSetName) variantAnnotationSetName = "vas" self._runCmd("add-variantset", self.datasetName, paths.annotatedVcfPath, '-R', paths.referenceSetName, "-aO", paths.ontologyName, "-n", variantAnnotationSetName) phenotypeAssociationSetName = "paSet" self._runCmd("add-phenotypeassociationset", self.datasetName, paths.phenotypeAssociationSetPath, "-n", phenotypeAssociationSetName) self._runCmd("verify") self._runCmd("list") self._runCmd("remove-phenotypeassociationset", self.datasetName, phenotypeAssociationSetName, "-f") self._runCmd("remove-variantset", self.datasetName, paths.variantSetName, "-f") self._runCmd("remove-variantset", self.datasetName, variantAnnotationSetName, "-f") self._runCmd("remove-readgroupset", self.datasetName, paths.readGroupSetName, "-f") self._runCmd("remove-featureset", self.datasetName, paths.featureSetName, "-f") self._runCmd("remove-dataset", self.datasetName, "-f") self._runCmd("remove-referenceset", paths.referenceSetName, "-f") self._runCmd("remove-ontology", paths.ontologyName, "-f") def testForce(self): datasetName = 'dataset1' self._runCmd("init") self._runCmd("add-dataset", datasetName) with mock.patch('ga4gh.server.cli.repomanager.getRawInput', lambda x: 'N'): self._runCmd("remove-dataset", datasetName) with mock.patch('ga4gh.server.cli.repomanager.getRawInput', lambda x: 'y'): self._runCmd("remove-dataset", datasetName) with self.assertRaises(SystemExit): self._runCmd("remove-dataset", datasetName)
def _verifyVariantCalls(): for gaCall in gaVariant.calls: self.assertValid(protocol.Call, protocol.toJson(gaCall)) self.assertIn(gaCall.call_set_name, pyvcfCallMap) pyvcfCall = pyvcfCallMap[gaCall.call_set_name] self._verifyVariantCallEqual(gaCall, pyvcfCall)