Example #1
0
def add_metadata_by_sample(
        data: pd.DataFrame,
        dst_dataset: Dataset,
        columns: dict):
    """Add metadata relying on breed name (column)"""

    # mind dataset species
    SampleSpecie = get_sample_species(dst_dataset.species)

    for index, row in data.iterrows():
        original_id = row.get(columns["id_column"])

        # get additional columns for original_id
        location = get_locations(row, columns, original_id)
        metadata = get_metadata(row, columns, original_id)

        # ok iterate over all samples of this dataset
        for sample in SampleSpecie.objects.filter(
                dataset=dst_dataset, original_id=original_id):

            logger.info(f"Updating '{sample}'")

            # set location features
            sample.location = location

            # set metadata if necessary
            if metadata:
                sample.metadata = metadata

            # update sample
            sample.save()
Example #2
0
def add_phenotype_by_breed(data: pd.DataFrame, dst_dataset: Dataset,
                           columns: dict):
    """Add metadata relying on breed name (column)"""

    logger.debug(f"Received columns: {columns}")

    # mind dataset species
    SampleSpecie = get_sample_species(dst_dataset.species)

    for index, row in data.iterrows():
        logger.debug(f"{index}, {row}")
        breed = row.get(columns["breed_column"])

        # get columns modelled in smarter database
        named_columns = get_named_columns(row, columns, breed)

        # get additional columns for breed
        additional_column = get_additional_column(row, columns, breed)

        # ok iterate over all samples of this dataset
        for sample in SampleSpecie.objects.filter(dataset=dst_dataset,
                                                  breed=breed):

            create_or_update_phenotype(sample, named_columns,
                                       additional_column)
Example #3
0
 def test_get_sample_goat(self):
     SampleSpecies = get_sample_species(species="Goat")
     self.assertEqual(SampleSpecies, SampleGoat)
Example #4
0
 def test_get_sample_sheep(self):
     SampleSpecies = get_sample_species(species="Sheep")
     self.assertEqual(SampleSpecies, SampleSheep)
Example #5
0
def main(
        src_dataset, dst_dataset, datafile, code_column, country_column,
        id_column, sex_column, chip_name):
    logger.info(f"{Path(__file__).name} started")

    # custom method to check a dataset and ensure that needed stuff exists
    src_dataset, [datapath] = fetch_and_check_dataset(
        archive=src_dataset,
        contents=[datafile]
    )

    # this will be the dataset used to define samples
    dst_dataset, _ = fetch_and_check_dataset(
        archive=dst_dataset,
        contents=[]
    )

    # mind dataset species
    SampleSpecie = get_sample_species(dst_dataset.species)

    # read datafile
    data = pandas_open(datapath)

    logger.info(f"Got columns: {data.columns.to_list()}")

    for index, row in data.iterrows():
        logger.debug(f"Got: {row.to_list()}")
        code = row.get(code_column)
        country = row.get(country_column)
        original_id = row.get(id_column)
        sex = None

        if sex_column:
            sex = str(row.get(sex_column))
            sex = SEX.from_string(sex)

            # drop sex column if unknown
            if sex == SEX.UNKNOWN:
                sex = None

        logger.debug(
            f"Got code: {code}, country: {country}, "
            f"original_id: {original_id}, sex: {sex}"
        )

        # process a country by doing a fuzzy search
        # HINT: this function cache results relying arguments using lru_cache
        # see find country implementation for more informations
        country = find_country(country)

        # get breed from database
        breed = Breed.objects(
            aliases__match={'fid': code, 'dataset': dst_dataset}).get()

        logger.debug(f"found breed '{breed}'")

        # get or create a new Sample Obj
        sample, created = get_or_create_sample(
            SampleSpecie,
            original_id,
            dst_dataset,
            breed,
            country.name,
            chip_name,
            sex)

        if created:
            logger.info(f"Sample '{sample}' added to database")

    logger.info(f"{Path(__file__).name} ended")