def run(args): sb = SciBiomart() if args.marts: # Check if the user wanted to print the marts sb.list_marts(True) return sb.set_mart(args.m) # Otherwise set the mart if args.datasets: # Check if the user wanted to print the datasets sb.list_datasets(True) return sb.set_dataset(args.d) # Otherwise set the dataset if args.filters: # Check if the user wanted to print the filters sb.list_filters(True) return if args.attrs: # Check if the user wanted to print the filters sb.list_attributes(True) return if args.configs: sb.list_configs(True) return # Otherwise they actually have a query so we run it # Convert the filetrs string to a dict if args.f: filters = json.loads(args.f) else: filters = None if args.a: attrs = args.a.split(",") else: attrs = None if not attrs and args.s: # We need the start and ends at least attrs = ['external_gene_name', 'chromosome_name', 'start_position', 'end_position', 'strand'] sb.u.dp(['Running query on:', '\nMart: ', sb.mart, '\nDataset: ', sb.dataset_version, '\nFilters: ', filters, '\nAttributes: ', attrs]) results_df = sb.run_query(filters, attrs) if args.s == 't': # Check if we need to sort the file convert_dict = {'start_position': int, 'end_position': int, 'strand': int, 'chromosome_name': str} sb.u.warn_p(['Removing any genes with no gene name... Required for sorting.']) results_df = results_df[~results_df['external_gene_name'].isnull()] results_df = results_df.astype(convert_dict) results_df = sb.sort_df_on_starts(results_df) # Note the user would have had to select the starts and ends saved_file = sb.save_as_csv(results_df, args.o) sb.u.dp(['Saved the output to:', saved_file])
def test_grch37(self): sb = SciBiomart('http://grch37.ensembl.org/biomart/martservice/') marts = sb.list_marts() self.sb = sb sb.set_mart('ENSEMBL_MART_ENSEMBL') datasets = sb.list_datasets() check_datasets_exist = [ 'hsapiens_gene_ensembl', 'mmusculus_gene_ensembl' ] # mmusculus_gene_ensembl found_datasets = [] for d in datasets['name'].values: if d in check_datasets_exist: found_datasets.append(d)
def test_list_marts(self): sb = SciBiomart() marts = sb.list_marts() expected_marts = [ 'ENSEMBL_MART_ENSEMBL', 'ENSEMBL_MART_MOUSE', 'ENSEMBL_MART_SEQUENCE', 'ENSEMBL_MART_ONTOLOGY', 'ENSEMBL_MART_GENOMIC', 'ENSEMBL_MART_SNP', 'ENSEMBL_MART_FUNCGEN' ] found_marts = [] # Check that all the marts are in the expected marts list count_marts = 0 for m in marts: for mart_attr in m: if mart_attr == '@name': assert m[mart_attr] in expected_marts count_marts += 1 found_marts.append(m[mart_attr]) # Now check we had all of them print(found_marts) print(count_marts, len(expected_marts)) assert count_marts == len(expected_marts) self.sb = sb