Ejemplo n.º 1
0
    def test_beacon_request(self):
        """
        Check that current Clinvar/Variants returns correct
        fields for beacon
        """
        dataset = Dataset.retrieve('ClinVar/Variants')

        beacon = dataset.beacon(genome_build='GRCh37',
                                chromosome='6',
                                coordinate=50432798,
                                allele='G')

        check_fields = ['query', 'exist', 'total']

        for f in check_fields:
            self.assertTrue(f in beacon)
        """
        Check that Clinvar/Variants version 3.7.0-2015-12-06
        returns true for specific case
        """

        dataset = Dataset.retrieve('ClinVar/3.7.0-2015-12-06/Variants')

        beacontwo = dataset.beacon(genome_build='GRCh37',
                                   chromosome='13',
                                   coordinate=113803460,
                                   allele='T')

        self.assertTrue(beacontwo['exist'])
        self.assertEqual(beacontwo['total'], 1)
Ejemplo n.º 2
0
    def __getattr__(self, name):
        """Shortcut to access attributes of the underlying Dataset resource"""
        from solvebio.resource import Dataset

        # Valid override attributes that let Object act like a Dataset
        valid_dataset_attrs = [
            # query data
            'query',
            'lookup',
            'beacon',
            # transform data
            'import_file',
            'export',
            'migrate',
            # dataset meta
            'fields',
            'template',
            'imports',
            'commits',
            # helpers
            'activity',
            'saved_queries'
        ]

        try:
            return self[name]
        except KeyError as err:
            # If the Object has a dataset_id, it is of object_type "dataset"
            # If there is no dataset_id, either this Object is a file or folder
            # or the resource has not yet been retrieved from the API.
            if name in valid_dataset_attrs and self.dataset_id:
                return getattr(Dataset(self.dataset_id, client=self._client),
                               name)

            raise AttributeError(*err.args)
Ejemplo n.º 3
0
    def test_dataset_fields(self):
        fields = Dataset.retrieve(self.TEST_DATASET_NAME).fields()
        dataset_field = fields.data[0]
        self.assertTrue('id' in dataset_field,
                        'Should be able to get id in list of dataset fields')

        check_fields = set([
            'class_name', 'created_at', 'data_type', 'dataset_id',
            'description', 'facets_url', 'ordering', 'is_hidden', 'is_valid',
            'is_list', 'entity_type', 'name', 'updated_at', 'is_read_only',
            'id', 'url'
        ])
        self.assertSetEqual(set(dataset_field.keys()), check_fields)
        expected = """

| Field                        | Data Type   | Description   |
|------------------------------+-------------+---------------|
| accession_numbers            | string      |               |
| approved_name                | string      |               |
| approved_symbol              | string      |               |
| ccds_ids                     | string      |               |
| chromosome                   | string      |               |
| date_approved                | date        |               |
| date_modified                | date        |               |
| date_name_changed            | date        |               |
| date_symbol_changed          | date        |               |
| ensembl_gene_id              | string      |               |
| ensembl_id_ensembl           | string      |               |
| entrez_gene_id               | string      |               |
| entrez_gene_id_ncbi          | string      |               |
| enzyme_ids                   | string      |               |
| gene_family_description      | string      |               |
| gene_family_tag              | string      |               |
| hgnc_id                      | long        |               |
| locus                        | string      |               |
| locus_group                  | string      |               |
| locus_specific_databases     | string      |               |
| locus_type                   | string      |               |
| mouse_genome_database_id     | long        |               |
| mouse_genome_database_id_mgi | long        |               |
| name_synonyms                | string      |               |
| omim_id_ncbi                 | string      |               |
| omim_ids                     | long        |               |
| previous_names               | string      |               |
| previous_symbols             | string      |               |
| pubmed_ids                   | string      |               |
| rat_genome_database_id_rgd   | long        |               |
| record_type                  | string      |               |
| refseq_id_ncbi               | string      |               |
| refseq_ids                   | string      |               |
| specialist_database_id       | string      |               |
| specialist_database_links    | string      |               |
| status                       | string      |               |
| synonyms                     | string      |               |
| ucsc_id_ucsc                 | string      |               |
| uniprot_id_uniprot           | string      |               |
| vega_ids                     | string      |               |
"""
        self.assertEqual("{0}".format(fields), expected[1:-1],
                         'tabulated dataset fields')
Ejemplo n.º 4
0
    def test_csv_exporter(self):
        dataset = Dataset.retrieve(self.TEST_DATASET_NAME)
        query = dataset.query()[:10]

        query.export('csv', filename='/tmp/test.csv')
        self.assertTrue(path.isfile('/tmp/test.csv'))
        remove('/tmp/test.csv')
Ejemplo n.º 5
0
    def dataset(self):
        """ Returns the dataset object """
        from solvebio import Dataset
        if not self.is_dataset:
            raise SolveError(
                "Only dataset objects have a Dataset resource. This is a {}".
                format(self.object_type))

        return Dataset.retrieve(self.dataset_id, client=self._client)
Ejemplo n.º 6
0
    def get_dataset(cls, alias):
        assert alias, 'Dataset alias argument cannot be None'

        if alias in app_settings.DATASET_ALIASES:
            return Dataset(app_settings.DATASET_ALIASES[alias])

        try:
            return Dataset(DatasetAlias.objects.get(alias=alias).dataset_id)
        except DatasetAlias.DoesNotExist:
            pass

        # if the alias_or_id matches the Dataset regex, return it
        if isinstance(alias, six.integer_types) or \
                re.match(Dataset.FULL_NAME_REGEX, alias):
            return Dataset(alias)

        raise SolveError('Cannot find the SolveBio dataset by alias "%s"' %
                         alias)
Ejemplo n.º 7
0
    def test_dataset_retrieval(self):
        dataset = Dataset.retrieve(self.TEST_DATASET_NAME)
        self.assertTrue('id' in dataset, 'Should be able to get id in dataset')

        check_fields = [
            'class_name', 'created_at', 'data_url', 'depository',
            'depository_id', 'depository_version', 'depository_version_id',
            'description', 'fields_url', 'full_name', 'genome_builds',
            'is_genomic', 'id', 'name', 'title', 'updated_at', 'url',
            'documents_count'
        ]

        for f in check_fields:
            self.assertTrue(f in dataset)
Ejemplo n.º 8
0
    def test_changelog_dataset(self):
        """
        First check if version 3.7.0-2015-12-06 returns a suitable
        changelog compared to its most current version
        """

        dataset = Dataset.retrieve('ClinVar/3.7.0-2015-12-06/Clinvar')

        clog = dataset.changelog()

        check_fields = ['attributes', 'to_dataset', 'from_dataset', 'fields']

        for f in check_fields:
            self.assertTrue(f in clog)
        """
        Check if changelog between 3.7.0-2015-12-06 and 3.6.0-2015-09-04
        returns
        correct dictionary
        """

        clogtwo = dataset.changelog('3.6.0-2015-09-04')

        final_changelog = {
            "attributes": {
                "documents_count": [145359, 157003]
            },
            "to_dataset": "ClinVar/3.7.0-2015-12-06/ClinVar",
            "from_dataset": "ClinVar/3.6.0-2015-09-04/ClinVar",
            "fields": {
                "removed": [],
                "added": ["sbid"],
                "changed": {}
            }
        }

        self.assertEqual(final_changelog, clogtwo)
Ejemplo n.º 9
0
 def setUp(self):
     super(LookupTests, self).setUp()
     self.dataset = Dataset.retrieve(self.TEST_DATASET_NAME)
Ejemplo n.º 10
0
 def test_dataset_facets(self):
     field = Dataset.retrieve(self.TEST_DATASET_NAME).fields('status')
     facets = field.facets()
     self.assertTrue(len(facets['facets']) >= 0)
Ejemplo n.º 11
0
 def setUp(self):
     self.dataset = Dataset.retrieve(self.TEST_DATASET_NAME)
     super(BatchQueryTest, self).setUp()