def make_lists_of_accessions_of_set_size(self): """make_lists_of_expanded_slices_of_set_size: should return a list of lists""" expected_list = ['HM780503 HM780506 HM780660 HM780780'] observed = make_lists_of_accessions_of_set_size( ['HM780503', 'HM780506', 'HM780660', 'HM780780'], size_limit=3) self.assertEqual(observed, expected_list)
def bulk_efetch(query_ids): """Wraps EUtils call""" # pre-bin calls because we're potentially obtaining millions of records # and EUtils would normally store all the records in memory bins = make_lists_of_accessions_of_set_size(list(query_ids)) handle = EUtils(db='nucleotide',rettype='gb') for queries in bins: data = handle[queries].read() yield data
def __getitem__(self, query): """Gets an query from NCBI. Assumes lists are lists of accessions. Returns a handle to the result (either in memory or file on disk). WARNING: result is not guaranteed to contain any data. """ #check if it's a slice if isinstance(query, slice): #query = expand_slice(query) queries = make_lists_of_expanded_slices_of_set_size(query) return self.grab_data(queries) #check if it's a list -- if so, delimit with ' ' if isinstance(query, list) or isinstance(query,tuple): #query = ' '.join(map(str, query)) queries = make_lists_of_accessions_of_set_size(query) return self.grab_data(queries) # most likey a general set of search terms #e.g. '9606[taxid] OR 28901[taxid]' . So just return. return self.grab_data([query])
def make_lists_of_accessions_of_set_size(self): """make_lists_of_expanded_slices_of_set_size: should return a list of lists""" expected_list = ['HM780503 HM780506 HM780660 HM780780'] observed = make_lists_of_accessions_of_set_size(['HM780503','HM780506', 'HM780660', 'HM780780'],size_limit=3) self.assertEqual(observed,expected_list)