Beispiel #1
0
    def variant(self, case_id, variant_id):
        """Return a specific variant.

            Args:
                case_id (str): Path to vcf file
                variant_id (str): A variant id

            Returns:
                variant (Variant): The variant object for the given id
        """
        case_obj = self.case(case_id=case_id)
        vcf_file_path = case_obj.variant_source
        self.head = get_header(vcf_file_path)

        self.vep_header = self.head.vep_columns
        self.snpeff_header = self.head.snpeff_columns

        handle = VCF(vcf_file_path)

        for index, variant in enumerate(handle):
            index += 1
            line_id = get_variant_id(
                variant_line=str(variant)).lstrip('chrCHR')
            if line_id == variant_id:
                return self._format_variants(variant=variant,
                                             index=index,
                                             case_obj=case_obj,
                                             add_all_info=True)

        return None
Beispiel #2
0
    def variant(self, case_id, variant_id):
        """Return a specific variant.

            Args:
                case_id (str): Path to vcf file
                variant_id (str): A variant id

            Returns:
                variant (Variant): The variant object for the given id
        """
        case_obj = self.case(case_id=case_id)
        vcf_file_path = case_obj.variant_source
        head = self._get_header(vcf_file_path)

        handle = get_vcf_handle(infile=vcf_file_path)
        relevant_lines = (line for line in handle if not line.startswith('#'))
        for index, variant_line in enumerate(relevant_lines):
            index += 1
            line_id = get_variant_id(variant_line=variant_line).lstrip('chrCHR')
            if line_id == variant_id:
                return self._format_variant(
                    variant_line=variant_line,
                    index=index,
                    case_obj=case_obj,
                    head=head
                )

        return None
def test_get_variant_id_dict():
    """docstring for test_get_variant_id_dict"""
    variant_dict = {
        'CHROM': '1',
        'POS': '10',
        'REF':'A',
        'ALT': 'T'
    }
    
    assert get_variant_id(variant_dict) == '1_10_A_T'
Beispiel #4
0
def father(ctx):
    """Check number of variants in common"""
    logger.info("Running variant_integrity father version {0}".format(
        variant_integrity.__version__))
    
    print_columns = ['ind_id', 'fraction_of_common_variants', 'common_variants', 'number_calls']
    # Children is a dictionary of children that counts the number of errors
    duos = []
    children = {}
    analysis_individuals = set()
    
    for ind_id in ctx.parent.individuals:
        individual_object = ctx.parent.individuals[ind_id]
        if individual_object.father != '0':
            duo = {
                'child': ind_id,
                'father': individual_object.father
            } 
            
            analysis_individuals.add(ind_id)
            analysis_individuals.add(individual_object.father)
            
            duos.append(duo)
            logger.info("Duo found: {0}".format(
                ', '.join(list(duo.values()))
            ))
    
    logger.info("Individuals included in analysis: {0}".format(
                    ','.join(list(analysis_individuals))))
    
    for duo in duos:
        children[duo['child']] = dict(zip(
            print_columns, [duo['child'], 0, 0, 0]))
    
    
    for line in ctx.parent.variant_file:
        variant_dict = get_variant_dict(
            variant_line=line, 
            header_line=ctx.parent.header_line
        )
        
        logger.debug("Checking genotype calls for variant {0}".format(
            get_variant_id(variant_dict=variant_dict)
        ))
        
        genotypes = get_genotypes(variant_dict, analysis_individuals)
        
        for duo in duos:
            child_id = duo['child']
            child_genotype = genotypes[child_id]
            father_genotype = genotypes[duo['father']]
            duo_genotypes = [
                child_genotype,
                father_genotype
            ]
            #First check if the child has the variant:
            if child_genotype.has_variant:
                # If child have high quality we count the variant
                if check_high_quality([child_genotype], ctx.parent.gq_treshold):
                    children[child_id]['number_calls'] += 1
                    if check_common_variant(duo_genotypes):
                        children[child_id]['common_variants'] += 1
    
    results = []
    
    for child_id in children:
        child_info = children[child_id]
        common = child_info['common_variants']
        variants = child_info['number_calls']
        percentage = common/variants
        
        child_info['fraction_of_common_variants'] = round(percentage, 3)
        results.append(child_info)
    
    to_json = ctx.parent.to_json
    outfile = ctx.parent.outfile
    
    if to_json:
        if outfile:
            json.dump(results, outfile)
        else:
            print(json.dumps(results))
    else:
        if outfile:
            outfile.write("#{0}\n".format('\t'.join(print_columns)))
        else:
            print("#{0}".format('\t'.join(print_columns)))
        
        for result in results:
            print_line = "{0}\t{1}\t{2}\t{3}".format(
                    result['ind_id'], result['fraction_of_common_variants'],
                    result['common_variants'], result['number_calls']
                )
            if outfile:
                outfile.write("{0}\n".format(print_line))
            else:
                print(print_line)
Beispiel #5
0
def mendel(ctx):
    """Check mendelian errors in all trios"""
    logger.info("Running variant_integrity mendel {0}".format(
        variant_integrity.__version__))
    
    print_columns = ['ind_id', 'fraction_of_errors', 'mendelian_errors', 'number_calls']
    # Children is a dictionary of children that counts the number of errors
    trios = []
    children = {}
    analysis_individuals = set()
    
    for family in ctx.parent.families:
        family_object = ctx.parent.families[family]
        for trio in family_object.trios:
            trio_individuals = {
                'mother':None,
                'father':None,
                'child':None,
            }
            for ind_id in trio:
                analysis_individuals.add(ind_id)
                individual_object = ctx.parent.individuals[ind_id]
                if individual_object.mother in trio:
                    trio_individuals['child'] = ind_id
                elif individual_object.sex == 1:
                    trio_individuals['father'] = ind_id
                else:
                    trio_individuals['mother'] = ind_id
            trios.append(trio_individuals)
            logger.info("Trio found: {0}".format(
                ', '.join(list(trio_individuals.values()))
            ))
    
    logger.info("Individuals included in analysis: {0}".format(
                    ','.join(list(analysis_individuals))))
    
    for trio in trios:
        children[trio['child']] = dict(zip(
            print_columns, [trio['child'], 0, 0, 0]))
    
    
    for line in ctx.parent.variant_file:
        variant_dict = get_variant_dict(
            variant_line=line, 
            header_line=ctx.parent.header_line
        )
        
        logger.debug("Checking genotype calls for variant {0}".format(
            get_variant_id(variant_dict=variant_dict)
        ))
        
        genotypes = get_genotypes(variant_dict, analysis_individuals)
        
        for trio in trios:
            child_id = trio['child']
            child_genotype = genotypes[child_id]
            mother_genotype = genotypes[trio['mother']]
            father_genotype = genotypes[trio['father']]
            trio_genotypes = [
                child_genotype,
                mother_genotype,
                father_genotype
            ]
            #First check if the child has the variant:
            if child_genotype.has_variant:
                # If all individuals are high quality we count the variant
                if check_high_quality(trio_genotypes, ctx.parent.gq_treshold):
                    children[child_id]['number_calls'] += 1
                    if check_mendelian_error(child_genotype, mother_genotype, father_genotype):
                        children[child_id]['mendelian_errors'] += 1
    
    results = []
    
    for child_id in children:
        child_info = children[child_id]
        errors = child_info['mendelian_errors']
        variants = child_info['number_calls']
        percentage = errors/variants
        
        child_info['fraction_of_errors'] = round(percentage, 3)
        results.append(child_info)
    
    to_json = ctx.parent.to_json
    outfile = ctx.parent.outfile
    
    if to_json:
        if outfile:
            json.dump(results, outfile)
        else:
            print(json.dumps(results))
    else:
        if outfile:
            outfile.write("#{0}\n".format('\t'.join(print_columns)))
        else:
            print("#{0}".format('\t'.join(print_columns)))
        
        for result in results:
            print_line = "{0}\t{1}\t{2}\t{3}".format(
                    result['ind_id'], result['fraction_of_errors'],
                    result['mendelian_errors'], result['number_calls']
                )
            if outfile:
                outfile.write("{0}\n".format(print_line))
            else:
                print(print_line)
def test_send_wrong():
    """docstring for test_get_variant_id_dict"""
    with pytest.raises(Exception):
        get_variant_id()
def test_get_variant_id_line():
    """docstring for test_get_variant_id_dict"""
    variant_line = '1\t10\t.\tA\tT'
    
    assert get_variant_id(variant_line=variant_line) == '1_10_A_T'
Beispiel #8
0
def test_get_variant_id_dict():
    """docstring for test_get_variant_id_dict"""
    variant_dict = {'CHROM': '1', 'POS': '10', 'REF': 'A', 'ALT': 'T'}

    assert get_variant_id(variant_dict) == '1_10_A_T'
Beispiel #9
0
def test_send_wrong():
    """docstring for test_get_variant_id_dict"""
    with pytest.raises(Exception):
        get_variant_id()
Beispiel #10
0
def test_get_variant_id_line():
    """docstring for test_get_variant_id_dict"""
    variant_line = '1\t10\t.\tA\tT'

    assert get_variant_id(variant_line=variant_line) == '1_10_A_T'