def test_beacon_request(self): """ Check that current Clinvar/Variants returns correct fields for beacon """ dataset = Dataset.retrieve('ClinVar/Variants') beacon = dataset.beacon(genome_build='GRCh37', chromosome='6', coordinate=50432798, allele='G') check_fields = ['query', 'exist', 'total'] for f in check_fields: self.assertTrue(f in beacon) """ Check that Clinvar/Variants version 3.7.0-2015-12-06 returns true for specific case """ dataset = Dataset.retrieve('ClinVar/3.7.0-2015-12-06/Variants') beacontwo = dataset.beacon(genome_build='GRCh37', chromosome='13', coordinate=113803460, allele='T') self.assertTrue(beacontwo['exist']) self.assertEqual(beacontwo['total'], 1)
def __getattr__(self, name): """Shortcut to access attributes of the underlying Dataset resource""" from solvebio.resource import Dataset # Valid override attributes that let Object act like a Dataset valid_dataset_attrs = [ # query data 'query', 'lookup', 'beacon', # transform data 'import_file', 'export', 'migrate', # dataset meta 'fields', 'template', 'imports', 'commits', # helpers 'activity', 'saved_queries' ] try: return self[name] except KeyError as err: # If the Object has a dataset_id, it is of object_type "dataset" # If there is no dataset_id, either this Object is a file or folder # or the resource has not yet been retrieved from the API. if name in valid_dataset_attrs and self.dataset_id: return getattr(Dataset(self.dataset_id, client=self._client), name) raise AttributeError(*err.args)
def test_dataset_fields(self): fields = Dataset.retrieve(self.TEST_DATASET_NAME).fields() dataset_field = fields.data[0] self.assertTrue('id' in dataset_field, 'Should be able to get id in list of dataset fields') check_fields = set([ 'class_name', 'created_at', 'data_type', 'dataset_id', 'description', 'facets_url', 'ordering', 'is_hidden', 'is_valid', 'is_list', 'entity_type', 'name', 'updated_at', 'is_read_only', 'id', 'url' ]) self.assertSetEqual(set(dataset_field.keys()), check_fields) expected = """ | Field | Data Type | Description | |------------------------------+-------------+---------------| | accession_numbers | string | | | approved_name | string | | | approved_symbol | string | | | ccds_ids | string | | | chromosome | string | | | date_approved | date | | | date_modified | date | | | date_name_changed | date | | | date_symbol_changed | date | | | ensembl_gene_id | string | | | ensembl_id_ensembl | string | | | entrez_gene_id | string | | | entrez_gene_id_ncbi | string | | | enzyme_ids | string | | | gene_family_description | string | | | gene_family_tag | string | | | hgnc_id | long | | | locus | string | | | locus_group | string | | | locus_specific_databases | string | | | locus_type | string | | | mouse_genome_database_id | long | | | mouse_genome_database_id_mgi | long | | | name_synonyms | string | | | omim_id_ncbi | string | | | omim_ids | long | | | previous_names | string | | | previous_symbols | string | | | pubmed_ids | string | | | rat_genome_database_id_rgd | long | | | record_type | string | | | refseq_id_ncbi | string | | | refseq_ids | string | | | specialist_database_id | string | | | specialist_database_links | string | | | status | string | | | synonyms | string | | | ucsc_id_ucsc | string | | | uniprot_id_uniprot | string | | | vega_ids | string | | """ self.assertEqual("{0}".format(fields), expected[1:-1], 'tabulated dataset fields')
def test_csv_exporter(self): dataset = Dataset.retrieve(self.TEST_DATASET_NAME) query = dataset.query()[:10] query.export('csv', filename='/tmp/test.csv') self.assertTrue(path.isfile('/tmp/test.csv')) remove('/tmp/test.csv')
def dataset(self): """ Returns the dataset object """ from solvebio import Dataset if not self.is_dataset: raise SolveError( "Only dataset objects have a Dataset resource. This is a {}". format(self.object_type)) return Dataset.retrieve(self.dataset_id, client=self._client)
def get_dataset(cls, alias): assert alias, 'Dataset alias argument cannot be None' if alias in app_settings.DATASET_ALIASES: return Dataset(app_settings.DATASET_ALIASES[alias]) try: return Dataset(DatasetAlias.objects.get(alias=alias).dataset_id) except DatasetAlias.DoesNotExist: pass # if the alias_or_id matches the Dataset regex, return it if isinstance(alias, six.integer_types) or \ re.match(Dataset.FULL_NAME_REGEX, alias): return Dataset(alias) raise SolveError('Cannot find the SolveBio dataset by alias "%s"' % alias)
def test_dataset_retrieval(self): dataset = Dataset.retrieve(self.TEST_DATASET_NAME) self.assertTrue('id' in dataset, 'Should be able to get id in dataset') check_fields = [ 'class_name', 'created_at', 'data_url', 'depository', 'depository_id', 'depository_version', 'depository_version_id', 'description', 'fields_url', 'full_name', 'genome_builds', 'is_genomic', 'id', 'name', 'title', 'updated_at', 'url', 'documents_count' ] for f in check_fields: self.assertTrue(f in dataset)
def test_changelog_dataset(self): """ First check if version 3.7.0-2015-12-06 returns a suitable changelog compared to its most current version """ dataset = Dataset.retrieve('ClinVar/3.7.0-2015-12-06/Clinvar') clog = dataset.changelog() check_fields = ['attributes', 'to_dataset', 'from_dataset', 'fields'] for f in check_fields: self.assertTrue(f in clog) """ Check if changelog between 3.7.0-2015-12-06 and 3.6.0-2015-09-04 returns correct dictionary """ clogtwo = dataset.changelog('3.6.0-2015-09-04') final_changelog = { "attributes": { "documents_count": [145359, 157003] }, "to_dataset": "ClinVar/3.7.0-2015-12-06/ClinVar", "from_dataset": "ClinVar/3.6.0-2015-09-04/ClinVar", "fields": { "removed": [], "added": ["sbid"], "changed": {} } } self.assertEqual(final_changelog, clogtwo)
def setUp(self): super(LookupTests, self).setUp() self.dataset = Dataset.retrieve(self.TEST_DATASET_NAME)
def test_dataset_facets(self): field = Dataset.retrieve(self.TEST_DATASET_NAME).fields('status') facets = field.facets() self.assertTrue(len(facets['facets']) >= 0)
def setUp(self): self.dataset = Dataset.retrieve(self.TEST_DATASET_NAME) super(BatchQueryTest, self).setUp()