예제 #1
0
def run(args):

    sb = SciBiomart()
    if args.marts:  # Check if the user wanted to print the marts
        sb.list_marts(True)
        return
    sb.set_mart(args.m) # Otherwise set the mart
    if args.datasets: # Check if the user wanted to print the datasets
        sb.list_datasets(True)
        return
    sb.set_dataset(args.d) # Otherwise set the dataset
    if args.filters: # Check if the user wanted to print the filters
        sb.list_filters(True)
        return
    if args.attrs: # Check if the user wanted to print the filters
        sb.list_attributes(True)
        return
    if args.configs:
        sb.list_configs(True)
        return
    # Otherwise they actually have a query so we run it
    # Convert the filetrs string to a dict
    if args.f:
        filters = json.loads(args.f)
    else:
        filters = None
    if args.a:
        attrs = args.a.split(",")
    else:
        attrs = None
    if not attrs and args.s:  # We need the start and ends at least
        attrs = ['external_gene_name', 'chromosome_name', 'start_position', 'end_position', 'strand']
    sb.u.dp(['Running query on:',
             '\nMart: ', sb.mart,
             '\nDataset: ', sb.dataset_version,
             '\nFilters: ', filters,
             '\nAttributes: ', attrs])
    results_df = sb.run_query(filters, attrs)
    if args.s == 't':  # Check if we need to sort the file
        convert_dict = {'start_position': int,
                        'end_position': int,
                        'strand': int,
                        'chromosome_name': str}
        sb.u.warn_p(['Removing any genes with no gene name... Required for sorting.'])

        results_df = results_df[~results_df['external_gene_name'].isnull()]

        results_df = results_df.astype(convert_dict)
        results_df = sb.sort_df_on_starts(results_df)  # Note the user would have had to select the starts and ends

    saved_file = sb.save_as_csv(results_df, args.o)
    sb.u.dp(['Saved the output to:', saved_file])
예제 #2
0
    def test_hg19(self):
        sb = SciBiomart()
        err = sb.list_datasets()
        # Expect an error if we haven't set a mart.
        assert err['err'] == MART_SET_ERR
        sb.set_mart('ENSEMBL_MART_ENSEMBL')
        datasets = sb.list_datasets()

        check_datasets_exist = [
            'hsapiens_gene_ensembl', 'mmusculus_gene_ensembl'
        ]  # mmusculus_gene_ensembl
        found_datasets = []
        for d in datasets['name'].values:
            if d in check_datasets_exist:
                found_datasets.append(d)

        sb.set_dataset('hsapiens_gene_ensembl')
        self.sb = sb
예제 #3
0
    def test_list_datasets(self):
        sb = SciBiomart()
        err = sb.list_datasets()
        # Expect an error if we haven't set a mart.
        assert err['err'] == MART_SET_ERR
        sb.set_mart('ENSEMBL_MART_ENSEMBL')
        datasets = sb.list_datasets()

        check_datasets_exist = [
            'fcatus_gene_ensembl', 'pcoquereli_gene_ensembl',
            'lsdomestica_gene_ensembl'
        ]
        found_datasets = []
        for d in datasets['name'].values:
            if d in check_datasets_exist:
                found_datasets.append(d)
        print(len(found_datasets), len(check_datasets_exist), len(datasets))
        # This has changed over time, probably need to think of a better test
        #assert len(found_datasets) == len(check_datasets_exist)
        assert len(datasets) > 203
        self.sb = sb
예제 #4
0
    def test_grch37(self):
        sb = SciBiomart('http://grch37.ensembl.org/biomart/martservice/')
        marts = sb.list_marts()
        self.sb = sb
        sb.set_mart('ENSEMBL_MART_ENSEMBL')
        datasets = sb.list_datasets()

        check_datasets_exist = [
            'hsapiens_gene_ensembl', 'mmusculus_gene_ensembl'
        ]  # mmusculus_gene_ensembl
        found_datasets = []
        for d in datasets['name'].values:
            if d in check_datasets_exist:
                found_datasets.append(d)