def tearDownClass(cls): # delete created objects Breed.objects().delete() Counter.objects().delete() Dataset.objects().delete() super().tearDownClass()
def setUpClass(cls): # initialize the mongomock instance super().setUpClass() # need a dataset for certain tests dataset = Dataset( file="test.zip", country="Italy", species="Sheep", contents=[ "plinktest.map", "plinktest.ped", "plinktest.fam", "plinktest.bim", "plinktest.bed", "snplist.txt", "finalreport.txt" ] ) dataset.save() # need to define a breed in order to get a smarter id alias = BreedAlias( fid="TEX_IT", dataset=dataset, country="Italy" ) breed = Breed( species="Sheep", name="Texel", code="TEX", n_individuals=0, aliases=[alias] ) breed.save() # need also a counter object for sheep and goat counter = Counter( pk="sampleSheep", sequence_value=0 ) counter.save() counter = Counter( pk="sampleGoat", sequence_value=0 ) counter.save()
def test_import_breeds_force_country(self, my_working_dir): # create a temporary directory using the context manager with tempfile.TemporaryDirectory() as tmpdirname: working_dir = pathlib.Path(tmpdirname) my_working_dir.return_value = working_dir # save worksheet in temporary folder self.workbook.save(f"{working_dir}/breed.xlsx") result = self.runner.invoke(self.main_function, [ "--species", "sheep", "--dataset", "test.zip", "--datafile", "breed.xlsx", "--code_column", "Code", "--breed_column", "Name", "--country_column", "Country", ]) self.assertEqual(0, result.exit_code) qs = Breed.objects() self.assertEqual(qs.count(), 1) breed = qs.get() alias = BreedAlias(fid="TEX", dataset=self.dataset, country="Italy") self.assertEqual(breed.aliases, [alias])
def test_sample_relies_dataset(self): """Getting two sample with the same original id is not a problem""" # get a sample line line = self.lines[0] # get a breed breed = Breed.objects(aliases__match={ 'fid': line[0], 'dataset': self.dataset }).get() # create a copy of dataset new_dataset = deepcopy(self.dataset) new_dataset.file = "test2.zip" new_dataset.id = None new_dataset.save() # ok create a samplesheep object with the same original_id first = self.plinkio.get_or_create_sample(line, self.dataset, breed) second = self.plinkio.get_or_create_sample(line, new_dataset, breed) self.assertEqual(SampleSheep.objects.count(), 2) self.assertEqual(first.original_id, second.original_id) # need to delete second sample in order to remove the new dataset # (mongoengine.DENY behaviour for deleting samples) second.delete() first.delete() # reset database to original state new_dataset.delete()
def test_add_breed(self): result = self.runner.invoke(self.main_function, [ "--species", "sheep", "--name", "Texel", "--code", "TEX", "--dataset", "test.zip", "--alias", "TEXEL_IT", "--alias", "0" ]) self.assertEqual(0, result.exit_code) qs = Breed.objects() self.assertEqual(qs.count(), 1) breed = qs.get() aliases = [ BreedAlias(fid=fid, dataset=self.dataset) for fid in ["TEXEL_IT", "0"] ] self.assertEqual(breed.aliases, aliases)
def test_get_or_create_sample(self): # get a sample line line = self.lines[0] # get a breed breed = Breed.objects(aliases__match={ 'fid': line[0], 'dataset': self.dataset }).get() # no individulas for such breeds self.assertEqual(breed.n_individuals, 0) self.assertEqual(SampleSheep.objects.count(), 0) # calling my function and collect sample reference = self.plinkio.get_or_create_sample(line, self.dataset, breed) self.assertIsInstance(reference, SampleSheep) # assert an element in database self.assertEqual(SampleSheep.objects.count(), 1) # check individuals updated breed.reload() self.assertEqual(breed.n_individuals, 1) # calling this function twice, returns the same individual test = self.plinkio.get_or_create_sample(line, self.dataset, breed) self.assertIsInstance(test, SampleSheep) # assert an element in database self.assertEqual(SampleSheep.objects.count(), 1) # check individuals updated breed.reload() self.assertEqual(breed.n_individuals, 1) self.assertEqual(reference, test)
def main( src_dataset, dst_dataset, datafile, code_column, country_column, id_column, sex_column, chip_name): logger.info(f"{Path(__file__).name} started") # custom method to check a dataset and ensure that needed stuff exists src_dataset, [datapath] = fetch_and_check_dataset( archive=src_dataset, contents=[datafile] ) # this will be the dataset used to define samples dst_dataset, _ = fetch_and_check_dataset( archive=dst_dataset, contents=[] ) # mind dataset species SampleSpecie = get_sample_species(dst_dataset.species) # read datafile data = pandas_open(datapath) logger.info(f"Got columns: {data.columns.to_list()}") for index, row in data.iterrows(): logger.debug(f"Got: {row.to_list()}") code = row.get(code_column) country = row.get(country_column) original_id = row.get(id_column) sex = None if sex_column: sex = str(row.get(sex_column)) sex = SEX.from_string(sex) # drop sex column if unknown if sex == SEX.UNKNOWN: sex = None logger.debug( f"Got code: {code}, country: {country}, " f"original_id: {original_id}, sex: {sex}" ) # process a country by doing a fuzzy search # HINT: this function cache results relying arguments using lru_cache # see find country implementation for more informations country = find_country(country) # get breed from database breed = Breed.objects( aliases__match={'fid': code, 'dataset': dst_dataset}).get() logger.debug(f"found breed '{breed}'") # get or create a new Sample Obj sample, created = get_or_create_sample( SampleSpecie, original_id, dst_dataset, breed, country.name, chip_name, sex) if created: logger.info(f"Sample '{sample}' added to database") logger.info(f"{Path(__file__).name} ended")