def test_set_dataset(self): sb = SciBiomart() err = sb.list_configs() assert err['err'] == MART_SET_ERR sb.set_mart('ENSEMBL_MART_ENSEMBL') err = sb.list_configs() assert err['err'] == DATASET_SET_ERR sb.set_dataset('hsapiens_gene_ensembl') assert sb.dataset_version == 'hsapiens_gene_ensembl-GRCh38.p13' self.sb = sb
def run(args): sb = SciBiomart() if args.marts: # Check if the user wanted to print the marts sb.list_marts(True) return sb.set_mart(args.m) # Otherwise set the mart if args.datasets: # Check if the user wanted to print the datasets sb.list_datasets(True) return sb.set_dataset(args.d) # Otherwise set the dataset if args.filters: # Check if the user wanted to print the filters sb.list_filters(True) return if args.attrs: # Check if the user wanted to print the filters sb.list_attributes(True) return if args.configs: sb.list_configs(True) return # Otherwise they actually have a query so we run it # Convert the filetrs string to a dict if args.f: filters = json.loads(args.f) else: filters = None if args.a: attrs = args.a.split(",") else: attrs = None if not attrs and args.s: # We need the start and ends at least attrs = ['external_gene_name', 'chromosome_name', 'start_position', 'end_position', 'strand'] sb.u.dp(['Running query on:', '\nMart: ', sb.mart, '\nDataset: ', sb.dataset_version, '\nFilters: ', filters, '\nAttributes: ', attrs]) results_df = sb.run_query(filters, attrs) if args.s == 't': # Check if we need to sort the file convert_dict = {'start_position': int, 'end_position': int, 'strand': int, 'chromosome_name': str} sb.u.warn_p(['Removing any genes with no gene name... Required for sorting.']) results_df = results_df[~results_df['external_gene_name'].isnull()] results_df = results_df.astype(convert_dict) results_df = sb.sort_df_on_starts(results_df) # Note the user would have had to select the starts and ends saved_file = sb.save_as_csv(results_df, args.o) sb.u.dp(['Saved the output to:', saved_file])
def test_list_configs(self): sb = SciBiomart() err = sb.list_configs() assert err['err'] == MART_SET_ERR sb.set_mart('ENSEMBL_MART_ENSEMBL') err = sb.list_configs() assert err['err'] == DATASET_SET_ERR sb.set_dataset('fcatus_gene_ensembl') configs = sb.list_configs(True) check_configs_exist = ['Exportable', 'Importable', 'MainTable'] found_configs = [] for d in configs: if d in check_configs_exist: found_configs.append(d) print(len(configs)) assert len(found_configs) == len(check_configs_exist) assert len(configs) == 23 self.sb = sb