Example #1
0
def export_causatives(adapter, collaborator):
    """Export causative variants for a collaborator
    
    Args:
        adapter(MongoAdapter)
        collaborator(str)
    
    Yields:
        variant_obj(scout.Models.Variant): Variants marked as causative ordered by position. 
    """

    # Store the variants in a list for sorting
    variants = []
    ##TODO add check so that same variant is not included more than once
    for document_id in adapter.get_causatives(institute_id=collaborator):
        variant_obj = adapter.variant(document_id)

        chrom = variant_obj['chromosome']
        # Convert chromosome to integer for sorting
        chrom_int = CHROMOSOME_INTEGERS.get(chrom)
        if not chrom_int:
            LOG.info("Unknown chromosome %s", chrom)
            continue

        # Add chromosome and position to prepare for sorting
        variants.append((chrom_int, variant_obj['position'], variant_obj))

    # Sort varants based on position
    variants.sort(key=lambda x: (x[0], x[1]))

    for variant in variants:
        variant_obj = variant[2]
        yield variant_obj
Example #2
0
def export_variants(adapter, collaborator, document_id=None, case_id=None):
    """Export causative variants for a collaborator

    Args:
        adapter(MongoAdapter)
        collaborator(str)
        document_id(str): Search for a specific variant
        case_id(str): Search causative variants for a case

    Yields:
        variant_obj(scout.Models.Variant): Variants marked as causative ordered by position.
    """

    # Store the variants in a list for sorting
    variants = []
    if document_id:
        yield adapter.variant(document_id)
        return

    variant_ids = adapter.get_causatives(
        institute_id=collaborator,
        case_id=case_id
        )
    ##TODO add check so that same variant is not included more than once
    for document_id in variant_ids:

        variant_obj = adapter.variant(document_id)
        chrom = variant_obj['chromosome']
        # Convert chromosome to integer for sorting
        chrom_int = CHROMOSOME_INTEGERS.get(chrom)
        if not chrom_int:
            LOG.info("Unknown chromosome %s", chrom)
            continue

        # Add chromosome and position to prepare for sorting
        variants.append((chrom_int, variant_obj['position'], variant_obj))

    # Sort varants based on position
    variants.sort(key=lambda x: (x[0], x[1]))

    for variant in variants:
        variant_obj = variant[2]
        yield variant_obj
Example #3
0
def export_variants(adapter, collaborator, document_id=None, case_id=None):
    """Export causative variants for a collaborator

    Args:
        adapter(MongoAdapter)
        collaborator(str)
        document_id(str): Search for a specific variant
        case_id(str): Search causative variants for a case

    Yields:
        variant_obj(scout.Models.Variant): Variants marked as causative ordered by position.
    """

    # Store the variants in a list for sorting
    variants = []
    if document_id:
        yield adapter.variant(document_id)
        return

    variant_ids = adapter.get_causatives(institute_id=collaborator,
                                         case_id=case_id)

    for doc_id in variant_ids:

        variant_obj = adapter.variant(doc_id)
        chrom = variant_obj["chromosome"]
        # Convert chromosome to integer for sorting
        chrom_int = CHROMOSOME_INTEGERS.get(chrom)
        if not chrom_int:
            LOG.info("Unknown chromosome %s", chrom)
            continue

        # Add chromosome and position to prepare for sorting
        variants.append((chrom_int, variant_obj["position"], variant_obj))

    # Sort varants based on position
    variants.sort(key=lambda x: (x[0], x[1]))

    for variant in variants:
        variant_obj = variant[2]
        yield variant_obj
Example #4
0
def export_panels(adapter, panels, versions=None, build='37'):
    """Export all genes in gene panels
    
    Exports the union of genes in one or several gene panels to a bed like format with coordinates.
    
    Args:
        adapter(scout.adapter.MongoAdapter)
        panels(iterable(str)): Iterable with panel ids
        bed(bool): If lines should be bed formated
    """
    if versions and (len(versions) != len(panels)):
        raise SyntaxError("If version specify for each panel")

    headers = []
    build_string = ("##genome_build={}")

    headers.append(build_string.format(build))
    header_string = (
        "##gene_panel={0},version={1},updated_at={2},display_name={3}")
    contig_string = ("##contig={0}")
    bed_string = ("{0}\t{1}\t{2}\t{3}\t{4}")

    # Save all gene ids found in the collection if panels
    panel_geneids = set()
    # Save all chromosomes found in the collection if panels
    chromosomes_found = set()
    # Store all hgnc geneobjs
    hgnc_geneobjs = []

    # Loop over the panels
    for i, panel_id in enumerate(panels):
        version = None
        if versions:
            version = versions[i]

        panel_obj = adapter.gene_panel(panel_id, version=version)
        if not panel_obj:
            LOG.warning("Panel {0} version {1} could not be found".format(
                panel_id, version))
            continue

        headers.append(
            header_string.format(
                panel_obj['panel_name'],
                panel_obj['version'],
                panel_obj['date'].date(),
                panel_obj['display_name'],
            ))
        # Collect the hgnc ids from all genes found
        for gene_obj in panel_obj['genes']:
            panel_geneids.add(gene_obj['hgnc_id'])

    gene_objs = adapter.hgncid_to_gene(build=build)

    for hgnc_id in panel_geneids:
        hgnc_geneobj = gene_objs.get(hgnc_id)
        if hgnc_geneobj is None:
            LOG.warn("missing HGNC gene: %s", hgnc_id)
            continue
        chrom = hgnc_geneobj['chromosome']
        start = hgnc_geneobj['start']
        chrom_int = CHROMOSOME_INTEGERS.get(chrom)
        if not chrom_int:
            LOG.warn("Chromosome %s out of scope", chrom)
            continue

        hgnc_geneobjs.append((chrom_int, start, hgnc_geneobj))
        chromosomes_found.add(chrom)

    # Sort the genes:
    hgnc_geneobjs.sort(key=lambda tup: (tup[0], tup[1]))

    for chrom in CHROMOSOMES:
        if chrom in chromosomes_found:
            headers.append(contig_string.format(chrom))

    headers.append("#chromosome\tgene_start\tgene_stop\thgnc_id\thgnc_symbol")

    for header in headers:
        yield header

    for hgnc_gene in hgnc_geneobjs:
        gene_obj = hgnc_gene[-1]
        gene_line = bed_string.format(gene_obj['chromosome'],
                                      gene_obj['start'], gene_obj['end'],
                                      gene_obj['hgnc_id'],
                                      gene_obj['hgnc_symbol'])
        yield gene_line
Example #5
0
def export_panels(adapter, panels, versions=None, build='37'):
    """Export all genes in gene panels
    
    Exports the union of genes in one or several gene panels to a bed like format with coordinates.
    
    Args:
        adapter(scout.adapter.MongoAdapter)
        panels(iterable(str)): Iterable with panel ids
        bed(bool): If lines should be bed formated
    """
    if versions and (len(versions) != len(panels)):
        raise SyntaxError("If version specify for each panel")

    headers = []
    build_string = ("##genome_build={}")
    
    headers.append(build_string.format(build))
    header_string = ("##gene_panel={0},version={1},updated_at={2},display_name={3}")
    contig_string = ("##contig={0}")
    bed_string = ("{0}\t{1}\t{2}\t{3}\t{4}")

    # Save all gene ids found in the collection if panels
    panel_geneids = set()
    # Save all chromosomes found in the collection if panels
    chromosomes_found = set()
    # Store all hgnc geneobjs
    hgnc_geneobjs = []

    # Loop over the panels
    for i,panel_id in enumerate(panels):
        version = None
        if versions:
            version = versions[i]
            
        panel_obj = adapter.gene_panel(panel_id, version=version)
        if not panel_obj:
            LOG.warning("Panel {0} version {1} could not be found".format(panel_id, version))
            continue

        headers.append(header_string.format(
            panel_obj['panel_name'],
            panel_obj['version'],
            panel_obj['date'].date(),
            panel_obj['display_name'],
        ))
        # Collect the hgnc ids from all genes found
        for gene_obj in panel_obj['genes']:
            panel_geneids.add(gene_obj['hgnc_id'])

    
    gene_objs = adapter.hgncid_to_gene(build=build)
    
    for hgnc_id in panel_geneids:
        hgnc_geneobj = gene_objs.get(hgnc_id)
        if hgnc_geneobj is None:
            LOG.warn("missing HGNC gene: %s", hgnc_id)
            continue
        chrom = hgnc_geneobj['chromosome']
        start = hgnc_geneobj['start']
        chrom_int = CHROMOSOME_INTEGERS.get(chrom)
        if not chrom_int:
            LOG.warn("Chromosome %s out of scope", chrom)
            continue
            
        hgnc_geneobjs.append((chrom_int, start, hgnc_geneobj))
        chromosomes_found.add(chrom)
    
    # Sort the genes:
    hgnc_geneobjs.sort(key=lambda tup: (tup[0], tup[1]))
    
    for chrom in CHROMOSOMES:
        if chrom in chromosomes_found:
            headers.append(contig_string.format(chrom))

    headers.append("#chromosome\tgene_start\tgene_stop\thgnc_id\thgnc_symbol")

    for header in headers:
        yield header

    for hgnc_gene in hgnc_geneobjs:
        gene_obj = hgnc_gene[-1]
        gene_line = bed_string.format(gene_obj['chromosome'], gene_obj['start'],
                                      gene_obj['end'], gene_obj['hgnc_id'],
                                      gene_obj['hgnc_symbol'])
        yield gene_line