Exemple #1
0
def test_get_mongo_variant(setup_database, vcf_case, get_institute):
    variant_parser = VCFParser(infile=one_variant)
    variants = []
    individuals = variant_parser.individuals
    scout_individuals = {ind_id:ind_id for ind_id in individuals}
    
    for variant in variant_parser:
        variants.append(variant)
    variant = variants[0]
    mongo_variant = get_mongo_variant(
                        variant=variant,
                        variant_type='clinical',
                        individuals=scout_individuals,
                        case=vcf_case,
                        institute=get_institute,
                        variant_count=100
                    )
    
    assert mongo_variant.chromosome == '14'
    assert mongo_variant.reference == 'C'
    assert mongo_variant.alternative == 'A'


    assert len(mongo_variant.genes) == 2
    assert len(mongo_variant.compounds) == 3
Exemple #2
0
    def add_variants(self, vcf_file, variant_type, case, variant_number_treshold=5000,
                    rank_score_threshold = 0):
        """Add variants to the mongo database

            Args:
                variants(str): Path to a vcf file
                variant_type(str): 'research' or 'clinical'
                case(Case): The case for which the variants should be uploaded
                nr_of_variants(int): Treshold for number of variants
                rank_score_threshold(int): Treshold for rankscore
        """
        case_id = case.case_id

        logger.info("Setting up a variant parser")
        variant_parser = VCFParser(infile=vcf_file)
        nr_of_variants = 0

        self.delete_variants(case_id, variant_type)
        institute = self.institute(institute_id=case.owner)
        start_inserting_variants = datetime.now()

        # Check which individuals that exists in the vcf file.
        # Save the individuals in a dictionary with individual ids as keys
        # and display names as values
        individuals = {}
        # loop over keys (internal ids)
        logger.info("Checking which individuals in ped file exists in vcf")
        for individual in case.individuals:
            individual_id = individual.individual_id
            display_name = individual.display_name
            logger.debug("Checking individual {0}".format(individual_id))
            if individual_id in variant_parser.individuals:
                logger.debug("Individual {0} found".format(individual_id))
                individuals[individual_id] = display_name
            else:
                logger.warning("Individual {0} is present in ped file but"\
                                " not in vcf".format(individual_id))

        logger.info('Start parsing variants')

        # If a rank score threshold is used, check if below that threshold
        for variant in variant_parser:
            logger.debug("Parsing variant {0}".format(variant['variant_id']))
            if not float(variant['rank_scores'][case.display_name]) > rank_score_threshold:
                logger.info("Lower rank score threshold reached after {0}"\
                            " variants".format(nr_of_variants))
                break

            if variant_number_treshold:
                if nr_of_variants > variant_number_treshold:
                    logger.info("Variant number threshold reached. ({0})".format(
                                variant_number_treshold))
                    break

            nr_of_variants += 1

            mongo_variant = get_mongo_variant(
                variant=variant,
                variant_type=variant_type,
                individuals=individuals,
                case=case,
                institute=institute,
                variant_count=nr_of_variants,
            )
            logger.debug("Saving variant {0}".format(mongo_variant.display_name))
            mongo_variant.save()

            if nr_of_variants % 1000 == 0:
                logger.info('{0} variants parsed'.format(nr_of_variants))

        logger.info("Parsing variants done")
        logger.info("{0} variants inserted".format(nr_of_variants))
        logger.info("Time to insert variants: {0}".format(
          datetime.now() - start_inserting_variants))