예제 #1
0
파일: dbconn.py 프로젝트: COMBAT-TB/tb2neo
def map_gene_and_cds_to_protein(protein):
    """
    Mapping Proteins to CDS
    :param protein:
    :return:
    """
    if protein and protein.parent:
        for tag in protein.parent:
            gene = Gene.select(graph, tag).first()
            if not gene:
                gene = PseudoGene.select(graph, tag).first()
            if gene:
                gene.encodes.add(protein)
                graph.push(gene)
                protein.encoded_by.add(gene)
                graph.push(protein)
    # ens_id = map_ue_to_ens_trs(entry['Entry'])[0]
    protein_entry = str(protein.uniquename).strip()
    ens_id = eu_mapping(protein_entry, 'ENSEMBLGENOME_TRS_ID')
    if ens_id is not None:
        cds = CDS.select(graph, ens_id[0]).first()
        if cds:
            # Protein-[derives_from]->CDS
            protein.derives_from.add(cds)
            graph.push(protein)
            cds.derived.add(protein)
            graph.push(cds)
예제 #2
0
파일: dbconn.py 프로젝트: COMBAT-TB/tb2neo
def map_srna_to_mrna(text_file):
    """
    Map sRNA to the mRNA they regulate
    :param text_file:
    :return:
    """
    sys.stdout.write("\nAdding sRNA data...")
    with open(text_file) as text_file:
        next(text_file)
        for line in text_file:
            tab_split = line.split('\t')
            srna_name = tab_split[0]
            # srna_fmax = tab_split[2]
            mrna_name = tab_split[7]
            # mrna_fmax = tab_split[9]
            ncrna = NCRna.select(graph).where("_.name=~'(?i).*{}.*'".format(
                srna_name.lower())).first()
            if ncrna:
                mrnas = mrna_name.split()
                if "-" in mrna_name:
                    mrnas = mrna_name.split("-")
                for name in mrnas:
                    gene = Gene.select(graph).where(
                        "_.uniquename=~'(?i).*{}.*'".format(
                            name.lower())).first()
                    if gene:
                        ncrna.regulates_gene.add(gene)
                        graph.push(ncrna)
예제 #3
0
파일: dbconn.py 프로젝트: COMBAT-TB/tb2neo
 def map_pathway_to_proteins(pathway_genes, path):
     for g_id in pathway_genes:
         g_id = "Rv" + \
             g_id.strip("RVBD_") if "RV" in g_id else g_id
         # Protein parent is stored as an array
         gene = Gene.select(graph, g_id).first()
         if gene:
             for protein in gene.encodes:
                 protein.pathway.add(path)
                 graph.push(protein)
                 path.protein.add(protein)
                 graph.push(path)
예제 #4
0
파일: dbconn.py 프로젝트: COMBAT-TB/tb2neo
def map_gene_to_orthologs(locus_tags):
    """
    Mapping Genes to orthologs
    :param locus_tags:
    :return:
    """
    sys.stdout.write("\nMapping Orthologs...\n")
    for tag_list in locus_tags:
        for tag in tag_list:
            gene = Gene.select(graph, tag).first()
            if gene:
                if tag.startswith('Rv'):
                    ortholog = fetch_ortholog(locus_tag=str(tag))
                    if ortholog:
                        orthologous_gene = Gene.select(graph,
                                                       str(ortholog)).first()
                        if orthologous_gene:
                            gene.orthologous_to.add(orthologous_gene)
                            orthologous_gene.orthologous_to_.add(gene)
                            graph.push(gene)
                            graph.push(orthologous_gene)
    sys.stdout.write("\nMapped Orthologs")
예제 #5
0
파일: db.py 프로젝트: thobalose/vcf2neo
    def create_variant_site_nodes(self, record, known_sites,
                                  annotation, v_set=None, c_set=None):
        """
         Create VariantSite Nodes
        :return:
        """
        pos = record.POS
        chrom = record.CHROM
        ref_allele = record.REF
        alt_allele = annotation[0]
        gene = annotation[4]
        consequence = annotation[10] if annotation[10] != '' else annotation[9]
        # A variant can affect multiple genes.
        # E.g a variant can be DOWNSTREAM from one gene and
        # UPSTREAM from another gene.
        gene_pos = str(pos)+gene

        if pos in known_sites:
            # we have already seen this variant site in another VCF file
            # data structure known_sites:
            # key: pos (genomic position) and gene
            # value: VariantSite
            v_site = known_sites[gene_pos]
            # known_sites[pos][1].append(call)
        else:
            # we don't know about this variant site yet
            v_site = Variant(chrom=str(chrom), pos=pos,
                             ref_allele=str(ref_allele),
                             alt_allele=str(alt_allele),
                             gene=gene,
                             consequence=consequence,
                             pk=v_set.name + gene_pos,
                             impact=annotation[2])
            v_site.biotype = annotation[7]
            v_site.effect = annotation[1]
            self.graph.create(v_site)
            known_sites[gene_pos] = v_site

        if c_set:
            v_site.belongs_to_cset.add(c_set)
            c_set.has_variants.add(v_site)

        gene = Gene.select(self.graph, str(v_site.gene)).first()
        if gene:
            v_site.occurs_in.add(gene)
            self.graph.push(v_site)

        if v_set:
            v_set.has_variant.add(v_site)
            self.graph.push(v_set)
        return known_sites
예제 #6
0
파일: dbconn.py 프로젝트: COMBAT-TB/tb2neo
def create_operon_nodes(text_file=None):
    """
    Adding functional categories to Feature Nodes
    :param text_file:
    :return:
    """
    sys.stdout.write("\nAdding operon data...")
    with open(text_file) as text_file:
        for line in text_file:
            if 'OPERON' in str(line):
                tab_split = line.split('\t')
                # locus = tab_split[0]
                # gene_name = tab_split[1]
                # name_operon = tab_split[10]
                locus_operon = tab_split[11]
                description = tab_split[7]
                operon = Operon()
                # Must we use the product as the uniquename
                operon.uniquename = locus_operon
                operon.description = description
                graph.create(operon)
                genes = locus_operon.split(',')
                for locus_tag in genes:
                    gene = Gene.select(graph, locus_tag.strip()).first()
                    if gene:
                        gene.member_of.add(operon)
                        if len(genes) == 1:
                            gene.co_regulated.add(gene)
                        else:
                            # Let's not build reverse co-regulated rel
                            for g_id in genes[1:]:
                                g = Gene.select(graph, g_id.strip()).first()
                                if g:
                                    gene.co_regulated.add(g)
                        graph.push(gene)
                        operon.gene.add(gene)
                        graph.push(operon)
예제 #7
0
파일: dbconn.py 프로젝트: COMBAT-TB/tb2neo
def create_gene_nodes(feature, organism):
    """
    Create Gene Nodes
    :param organism:
    :param feature:
    :return:
    """
    names = get_feature_name(feature)
    name = names.get("Name", names.get("UniqueName"))
    unique_name = names.get("UniqueName", name)
    description = feature.qualifiers.get("description", "")
    biotype = feature.qualifiers['biotype'][0]
    parent = feature.qualifiers.get("Parent", " ")[0]

    gene = Gene()
    gene.name = name
    gene.uniquename = unique_name
    gene.parent = parent[parent.find(':') + 1:]
    gene.biotype = biotype
    gene.description = description
    graph.create(gene)
    gene.belongs_to.add(organism)
    graph.push(gene)
    gene_dict[unique_name] = gene
예제 #8
0
파일: dbconn.py 프로젝트: COMBAT-TB/tb2neo
def create_known_mutation_nodes(**kwargs):
    """
    Create Known mutations
    :return:
    """
    fluoroquinolones = [
        "ciprofloxacin", "ofloxacin", "levofloxacin", "moxifloxacin"
    ]
    aminoglyconsides = ["amikacin", "kanamycin", "streptomycin", "capreomycin"]

    v_set = VariantSet(name=kwargs.get("vset_name", ""),
                       owner=kwargs.get("vset_owner", ""))
    call_set = CallSet(name=kwargs.get("cset_name", ""))

    v_set.has_callsets.add(call_set)
    call_set.belongs_to_vset.add(v_set)

    graph.create(v_set)
    graph.create(call_set)

    variant = Variant(chrom=kwargs.get("chrom", ""),
                      pos=kwargs.get("pos", ""),
                      ref_allele=kwargs.get("ref_allele", ""),
                      alt_allele=kwargs.get("alt_allele", ""),
                      gene=kwargs.get("gene", ""),
                      pk=kwargs.get("pk", ""),
                      consequence=kwargs.get("consequence", ""))
    variant.loc_in_seq = kwargs.get("loc_in_seq")
    variant.promoter = kwargs.get("promoter")
    variant.biotype = kwargs.get("biotype")
    variant.drug = kwargs.get("drug_name")
    variant.sources = kwargs.get("sources")
    variant.belongs_to_cset.add(call_set)
    call_set.has_variants.add(variant)

    def map_drug_class_to_variant(_class):
        """
        Map all drugs in class to variant
        :param _class:
        :return:
        """
        for item in _class:
            drugs = Drug.select(graph).where(
                "_.name=~'(?i).*{}.*'".format(item))
            for _drug in drugs:
                variant.resistant_to.add(_drug)

    if kwargs.get("drug_name") == "aminoglycosides":
        map_drug_class_to_variant(aminoglyconsides)
    elif kwargs.get("drug_name") == "fluoroquinolones":
        map_drug_class_to_variant(fluoroquinolones)
    else:
        for drug_id in kwargs.get("drugbank_id"):
            drug = Drug.select(graph, str(drug_id).upper()).first()
            if drug:
                variant.resistant_to.add(drug)
            elif drug_id and kwargs.get("drug_name"):
                drug = Drug(accession=drug_id.strip(),
                            name=kwargs.get("drug_name").capitalize())
                graph.create(drug)
                variant.resistant_to.add(drug)

    gene_name = str(kwargs.get("gene")).lower()
    gene = Gene.select(graph).where(
        f"_.name=~'(?i).*{gene_name}.*' OR _.uniquename=~'(?i).*{gene_name}.*'"
    ).first()
    if gene:
        variant.occurs_in.add(gene)
    else:
        rna = RRna.select(graph).where(
            f"_.name=~'(?i).*{gene_name}.*' OR _.uniquename=~'(?i).*{gene_name}.*'"
        ).first()
        if rna:
            variant.occurs_in_.add(rna)

    graph.create(variant)
    graph.push(call_set)
예제 #9
0
def test_rv0001():
    gene = Gene.select(graph, "Rv0001").first()
    assert gene.name == "dnaA"
    assert gene.category != ""
    assert gene.residues != ""