Ejemplo n.º 1
0
def run(args):

    sb = SciBiomart()
    if args.marts:  # Check if the user wanted to print the marts
        sb.list_marts(True)
        return
    sb.set_mart(args.m) # Otherwise set the mart
    if args.datasets: # Check if the user wanted to print the datasets
        sb.list_datasets(True)
        return
    sb.set_dataset(args.d) # Otherwise set the dataset
    if args.filters: # Check if the user wanted to print the filters
        sb.list_filters(True)
        return
    if args.attrs: # Check if the user wanted to print the filters
        sb.list_attributes(True)
        return
    if args.configs:
        sb.list_configs(True)
        return
    # Otherwise they actually have a query so we run it
    # Convert the filetrs string to a dict
    if args.f:
        filters = json.loads(args.f)
    else:
        filters = None
    if args.a:
        attrs = args.a.split(",")
    else:
        attrs = None
    if not attrs and args.s:  # We need the start and ends at least
        attrs = ['external_gene_name', 'chromosome_name', 'start_position', 'end_position', 'strand']
    sb.u.dp(['Running query on:',
             '\nMart: ', sb.mart,
             '\nDataset: ', sb.dataset_version,
             '\nFilters: ', filters,
             '\nAttributes: ', attrs])
    results_df = sb.run_query(filters, attrs)
    if args.s == 't':  # Check if we need to sort the file
        convert_dict = {'start_position': int,
                        'end_position': int,
                        'strand': int,
                        'chromosome_name': str}
        sb.u.warn_p(['Removing any genes with no gene name... Required for sorting.'])

        results_df = results_df[~results_df['external_gene_name'].isnull()]

        results_df = results_df.astype(convert_dict)
        results_df = sb.sort_df_on_starts(results_df)  # Note the user would have had to select the starts and ends

    saved_file = sb.save_as_csv(results_df, args.o)
    sb.u.dp(['Saved the output to:', saved_file])
Ejemplo n.º 2
0
    def test_grch37(self):
        sb = SciBiomart('http://grch37.ensembl.org/biomart/martservice/')
        marts = sb.list_marts()
        self.sb = sb
        sb.set_mart('ENSEMBL_MART_ENSEMBL')
        datasets = sb.list_datasets()

        check_datasets_exist = [
            'hsapiens_gene_ensembl', 'mmusculus_gene_ensembl'
        ]  # mmusculus_gene_ensembl
        found_datasets = []
        for d in datasets['name'].values:
            if d in check_datasets_exist:
                found_datasets.append(d)
Ejemplo n.º 3
0
    def test_list_marts(self):
        sb = SciBiomart()
        marts = sb.list_marts()
        expected_marts = [
            'ENSEMBL_MART_ENSEMBL', 'ENSEMBL_MART_MOUSE',
            'ENSEMBL_MART_SEQUENCE', 'ENSEMBL_MART_ONTOLOGY',
            'ENSEMBL_MART_GENOMIC', 'ENSEMBL_MART_SNP', 'ENSEMBL_MART_FUNCGEN'
        ]
        found_marts = []
        # Check that all the marts are in the expected marts list
        count_marts = 0
        for m in marts:
            for mart_attr in m:
                if mart_attr == '@name':
                    assert m[mart_attr] in expected_marts
                    count_marts += 1
                    found_marts.append(m[mart_attr])

        # Now check we had all of them
        print(found_marts)
        print(count_marts, len(expected_marts))
        assert count_marts == len(expected_marts)
        self.sb = sb