예제 #1
0
파일: load.py 프로젝트: moonso/loqusdb
def load_variants(adapter, family_id, individuals, vcf, nr_variants=None, skip_case_id=False, gq_treshold=None):
    """Load variants for a family into the database.

    Args:
        adapter (loqusdb.plugins.Adapter): initialized plugin
        family_id (str): unique family identifier
        inidividuals (List[str]): list to match individuals
        vcf (iterable(dict)): An iterable variant dictionaries
        skip_case_id (bool): whether to include the case id on variant level 
                             or not
    """
    gq_treshold = gq_treshold or 20

    if skip_case_id:
        family_id = None

    # Loop over the variants in the vcf
    with click.progressbar(vcf, label="Inserting variants", length=nr_variants) as bar:
        for variant in bar:
            # Creates a variant that is ready to insert into the database
            formated_variant = get_formated_variant(
                variant=variant, individuals=individuals, family_id=family_id, gq_treshold=gq_treshold
            )

            if formated_variant:
                adapter.add_variant(variant=formated_variant)
예제 #2
0
def load_variants(adapter, family_id, individuals, vcf, nr_variants=None, 
                  skip_case_id=False, gq_treshold=None):
    """Load variants for a family into the database.

    Args:
        adapter (loqusdb.plugins.Adapter): initialized plugin
        family_id (str): unique family identifier
        inidividuals (List[str]): list to match individuals
        vcf (iterable(dict)): An iterable variant dictionaries
        skip_case_id (bool): whether to include the case id on variant level 
                             or not
    """
    gq_treshold = gq_treshold or 20
    
    if skip_case_id:
        family_id = None

    # Loop over the variants in the vcf
    with click.progressbar(vcf, label="Inserting variants",length=nr_variants) as bar:
        for variant in bar:
            #Creates a variant that is ready to insert into the database
            formated_variant = get_formated_variant(
                            variant=variant,
                            individuals=individuals,
                            family_id=family_id,
                            gq_treshold=gq_treshold,
                        )
        
            if formated_variant:
                adapter.add_variant(variant=formated_variant)
def test_format_variant_no_gq(variant_no_gq, individuals, case_id):

    formated_variant = get_formated_variant(
        variant=variant_no_gq,
        individuals=individuals,
        family_id=case_id
        )
    assert formated_variant == {}
예제 #4
0
def test_format_variant_no_call(variant_no_call, individuals, case_id):

    formated_variant = get_formated_variant(
        variant=variant_no_call,
        individuals=individuals,
        family_id=case_id
        )
    assert formated_variant == {}
예제 #5
0
def test_format_variant_no_call(variant_no_call, individuals, case_id,
                                ind_positions):

    formated_variant = get_formated_variant(variant=variant_no_call,
                                            ind_positions=ind_positions,
                                            individuals=individuals,
                                            family_id=case_id)
    assert formated_variant == {}
예제 #6
0
def test_format_variant_no_family_id(het_variant, individuals, ind_positions):
    formated_variant = get_formated_variant(variant=het_variant,
                                            individuals=individuals,
                                            ind_positions=ind_positions,
                                            family_id=None)
    assert formated_variant
    assert formated_variant.get('family_id') == None
    assert formated_variant['homozygote'] == 0
    assert formated_variant['hemizygote'] == 0
예제 #7
0
def test_format_hemizygote_variant(hem_variant, individuals, case_id,
                                   ind_positions):

    formated_variant = get_formated_variant(variant=hem_variant,
                                            individuals=individuals,
                                            ind_positions=ind_positions,
                                            family_id=case_id)
    assert formated_variant['homozygote'] == 0
    assert formated_variant['hemizygote'] == 1
def test_format_homozygote_variant(hom_variant, individuals, case_id):

    formated_variant = get_formated_variant(
        variant=hom_variant,
        individuals=individuals,
        family_id=case_id
        )
    assert formated_variant['homozygote'] == 1
    assert formated_variant['hemizygote'] == 0
예제 #9
0
def test_format_hemizygote_variant(hem_variant, individuals, case_id):

    formated_variant = get_formated_variant(
        variant=hem_variant,
        individuals=individuals,
        family_id=case_id
        )
    assert formated_variant['homozygote'] == 0
    assert formated_variant['hemizygote'] == 1
예제 #10
0
def test_format_variant_chr_prefix(variant_chr, individuals, ind_positions,
                                   case_id):

    formated_variant = get_formated_variant(variant=variant_chr,
                                            individuals=individuals,
                                            ind_positions=ind_positions,
                                            family_id=case_id,
                                            gq_treshold=20)
    assert formated_variant['chrom'] == variant_chr.CHROM[3:]
예제 #11
0
def test_format_variant_no_gq(variant_no_gq, individuals, ind_positions,
                              case_id):

    formated_variant = get_formated_variant(variant=variant_no_gq,
                                            individuals=individuals,
                                            ind_positions=ind_positions,
                                            family_id=case_id,
                                            gq_treshold=20)
    assert formated_variant == {}
예제 #12
0
def test_format_variant_no_family_id(het_variant, individuals):
    formated_variant = get_formated_variant(
        variant=het_variant,
        individuals=individuals,
        family_id=None
        )
    assert formated_variant
    assert formated_variant.get('family_id') == None
    assert formated_variant['homozygote'] == 0
    assert formated_variant['hemizygote'] == 0
예제 #13
0
def test_format_variant_no_call():
    """docstring for test_format_variant"""
    variant_line = get_variant(genotypes=['./.'])
    header_line = get_header_line()
    
    affected_individuals = set(['proband'])
    formatted_variant = get_formated_variant(
        variant_line = variant_line,
        header_line = header_line,
        affected_individuals = affected_individuals
    )
    
    assert formatted_variant == {}
예제 #14
0
def test_format_variant_no_header():
    """docstring for test_format_variant"""
    variant_line = get_variant(genotypes=['1/1'])
    header_line = []
    
    affected_individuals = set(['proband'])
    
    with pytest.raises(Exception):
        formatted_variant = get_formated_variant(
            variant_line = variant_line,
            header_line = header_line,
            affected_individuals = affected_individuals
        )
예제 #15
0
def test_format_homozygote_variant():
    """docstring for test_format_variant"""
    variant_line = get_variant(genotypes=['1/1'])
    header_line = get_header_line()
    
    affected_individuals = set(['proband'])
    formatted_variant = get_formated_variant(
        variant_line = variant_line,
        header_line = header_line,
        affected_individuals = affected_individuals
    )
    
    assert formatted_variant['_id'] == '1_10_A_T'
    assert formatted_variant['homozygote'] == 1
예제 #16
0
def delete_variants(adapter, vcf, ind_positions, family_id, individuals):
    """Delete variants for a case in the database
    
    Args:
        adapter (loqusdb.plugins.Adapter)
        vcf (iterable(dict))
        ind_positions(dict)
        family_id (str)
    
    Returns:
        nr_of_deleted (int): Number of deleted variants
    """
    nr_of_deleted = 0
    start_deleting = datetime.now()
    chrom_time = datetime.now()
    current_chrom = None
    new_chrom = None

    for variant in vcf:
        formated_variant = get_formated_variant(variant=variant,
                                                ind_positions=ind_positions,
                                                individuals=individuals,
                                                family_id=family_id)

        if formated_variant:
            new_chrom = formated_variant.get('chrom')

            adapter.delete_variant(formated_variant)
            nr_of_deleted += 1

            if new_chrom != current_chrom:
                if current_chrom:
                    logger.info("Chromosome {0} done".format(current_chrom))
                    logger.info("Time to delete chromosome {0}: {1}".format(
                        current_chrom,
                        datetime.now() - chrom_time))
                    logger.info(
                        "Start deleting chromosome {0}".format(new_chrom))
                else:
                    logger.info(
                        "Start deleting chromosome {}".format(new_chrom))

                current_chrom = new_chrom
                chrom_time = datetime.now()

    return nr_of_deleted
예제 #17
0
def test_format_variant(het_variant, individuals, ind_positions, case_id):
    formated_variant = get_formated_variant(variant=het_variant,
                                            individuals=individuals,
                                            ind_positions=ind_positions,
                                            family_id=case_id)

    expected_id = '_'.join([
        het_variant.CHROM,
        str(het_variant.POS), het_variant.REF, het_variant.ALT[0]
    ])

    assert formated_variant
    assert formated_variant['_id'] == expected_id
    assert formated_variant['chrom'] == het_variant.CHROM
    assert formated_variant['pos'] == het_variant.POS
    assert formated_variant['ref'] == het_variant.REF
    assert formated_variant['alt'] == het_variant.ALT[0]
    assert formated_variant['family_id'] == case_id
    assert formated_variant['homozygote'] == 0
예제 #18
0
def delete_variants(adapter, variant_stream, family_id, affected_individuals):
    case = {'case_id': family_id}
    adapter.delete_case(case)

    header = []
    nr_of_deleted = 0
    for line in variant_stream:
        line = line.rstrip()
        if line.startswith('#'):
            if not line.startswith('##'):
                header = line[1:].split()
        else:
            formated_variant = get_formated_variant(
                variant_line=line, header_line=header,
                affected_individuals=affected_individuals)

            adapter.delete_variant(formated_variant)
            nr_of_deleted += 1

    return nr_of_deleted
def test_format_variant(het_variant, individuals, case_id):
    formated_variant = get_formated_variant(
        variant=het_variant,
        individuals=individuals,
        family_id=case_id
        )
    
    expected_id = '_'.join([
        het_variant['CHROM'],
        het_variant['POS'], 
        het_variant['REF'], 
        het_variant['ALT']
    ])
    
    assert formated_variant
    assert formated_variant['_id'] == expected_id
    assert formated_variant['chrom'] == het_variant['CHROM']
    assert formated_variant['pos'] == int(het_variant['POS'])
    assert formated_variant['ref'] == het_variant['REF']
    assert formated_variant['alt'] == het_variant['ALT']
    assert formated_variant['family_id'] == case_id
    assert formated_variant['homozygote'] == 0
예제 #20
0
파일: delete.py 프로젝트: moonso/loqusdb
def delete_variants(adapter, vcf, family_id, individuals):
    """Delete variants for a case in the database
    
        Args:
            adapter (loqusdb.plugins.Adapter)
            vcf (iterable(dict))
            family_id (str)
        
        Returns:
            nr_of_deleted (int): Number of deleted variants
    """
    nr_of_deleted = 0
    start_deleting = datetime.now()
    chrom_time = datetime.now()
    current_chrom = None
    new_chrom = None

    for variant in vcf:
        formated_variant = get_formated_variant(variant=variant, individuals=individuals, family_id=family_id)

        if formated_variant:
            new_chrom = formated_variant.get("chrom")

            adapter.delete_variant(formated_variant)
            nr_of_deleted += 1

            if new_chrom != current_chrom:
                if current_chrom:
                    logger.info("Chromosome {0} done".format(current_chrom))
                    logger.info("Time to delete chromosome {0}: {1}".format(current_chrom, datetime.now() - chrom_time))
                    logger.info("Start deleting chromosome {0}".format(new_chrom))
                else:
                    logger.info("Start deleting chromosome {}".format(new_chrom))

                current_chrom = new_chrom
                chrom_time = datetime.now()

    return nr_of_deleted
예제 #21
0
파일: load.py 프로젝트: robinandeer/loqusdb
def load_variants(adapter, family_id, affected_individuals, variant_stream,
                  bulk_insert=False, vcf_path=None):
    """Load variants for a family into the database.

    Args:
        adapter (loqusdb.plugins.MongoAdapter): initialized plugin
        family_id (str): unique family identifier
        affected_inidividuals (List[str]): list to match individuals
        variant_stream (sequence): stream of VCF lines
        bulk_insert (bool): whether to insert in bulk or one-by-one
        vcf_path (path): for storing in database
    """
    case = {'case_id': family_id, 'vcf_path': vcf_path}
    adapter.add_case(case)

    # This is the header line with mandatory vcf fields
    header = []
    nr_of_variants = 0
    nr_of_inserted = 0

    start_inserting = datetime.now()
    start_ten_thousand = datetime.now()

    variants = []
    for line in variant_stream:
        line = line.rstrip()
        if line.startswith('#'):
            if not line.startswith('##'):
                header = line[1:].split()
        else:
            nr_of_variants += 1

            formated_variant = get_formated_variant(
                variant_line=line, header_line=header,
                affected_individuals=affected_individuals)

            if formated_variant:
                nr_of_inserted += 1
                if bulk_insert:
                    variants.append(formated_variant)
                else:
                    adapter.add_variant(variant=formated_variant)

            if nr_of_variants % 10000 == 0:
                logger.info("{0} of variants processed".format(nr_of_variants))
                logger.info("Time to insert last 10000: {0}".format(
                    datetime.now()-start_ten_thousand))
                start_ten_thousand = datetime.now()

            if nr_of_variants % 100000 == 0:
                if bulk_insert:
                    adapter.add_bulk(variants)
                    variants = []

    if bulk_insert:
        adapter.add_bulk(variants)

    logger.info("Nr of variants in vcf: {0}".format(nr_of_variants))
    logger.info("Nr of variants inserted: {0}".format(nr_of_inserted))
    logger.info("Time to insert variants: {0}".format(datetime.now() -
                                                      start_inserting))