Beispiel #1
0
 def offtarget(organism,
               offtarget_databases,
               offtarget_names,
               tmp_dir=None):
     if not tmp_dir:
         tmp_dir = "/data/organismos/" + organism + "/annotation/"
     mkdir(tmp_dir)
     proteins = tmp_dir + "proteins.fasta"
     if not os.path.exists(proteins):
         BioMongoDB.protein_fasta(proteins, organism)
     results = Offtarget.offtargets(proteins, tmp_dir, offtarget_databases)
     for i, name in enumerate(offtarget_names):
         load_blast_features(organism,
                             results[i],
                             name,
                             min_identity=0.4,
                             min_query_coverage=0.4,
                             min_hit_coverage=0.4)
Beispiel #2
0
    ("MpyloriIndia", "Helicobacter pylori India7 (e-proteobacteria)",
     "/data/organismos/MpyloriIndia/GCF_000185185.1_ASM18518v1_genomic.gbff",
     907238),
]

for name, org, ann_path, tax in orgs:
    organism = name
    mkdir("/data/organismos/" + name + "/annotation/offtarget")
    mkdir("/data/organismos/" + name + "/annotation/pwtools")
    mkdir("/data/organismos/" + name + "/annotation/pathways")
    mkdir("/data/organismos/" + name + "/estructura/raw")
    mkdir("/data/organismos/" + name + "/estructura/sndg/modelos")
    mkdir("/data/organismos/" + name + "/estructura/sndg/pockets")

    from_ref_seq(name, ann_path, tax=tax, cpus=3)
    mdb.protein_fasta("/data/organismos/" + name + "/annotation/proteins.faa",
                      name)
    update_proteins("/tmp/" + name + "/",
                    "/data/organismos/" + name + "/annotation/proteins.faa",
                    name,
                    1003200,
                    db_init=mysqldb)

    Offtargeting.offtargets(
        "/data/organismos/" + name + "/annotation/proteins.faa",
        "/data/organismos/" + name + "/annotation/offtarget/",
        offtarget_dbs=[
            "/data/databases/deg/degaa-p.dat",
            "/data/databases/human/gencode.v17.pc_translations.fa",
            "/data/databases/human/gut_microbiota.fasta"
        ])
    import_prop_blast(mdb.db,
Beispiel #3
0
            "options": ["No", "Yes"],
            "description": "Has a hit in Database of Essential Genes"
        })
}
from SNDG.Sequence import read_blast_table
from tqdm import tqdm

# cols = list(SeqCollection.objects(name__nin=["cruzi","pdb"]))
cols = list(SeqCollection.objects(name__nin=["cruzi", "pdb"]))
cpus = 4
db = mdb.db
for seqCol in tqdm(cols):
    mkdir("/data/organismos/" + seqCol.name + "/contigs")
    proteome = "/data/organismos/" + seqCol.name + "/contigs/genoma.fasta"
    if not os.path.exists(proteome):
        mdb.protein_fasta(proteome, seqCol.name)

    out = "/data/organismos/" + seqCol.name + "/annotation/offtarget/"
    mkdir(out)

    if not seqCol.has_druggability_param("human_offtarget"):

        seqCol.druggabilityParams.append(off_props["human_offtarget"])
        db = "/data/databases/human/gencode.v17.pc_translations.fa"

        execute(
            "blastp -evalue 1e-5 -max_hsps 1 -outfmt 6 -max_target_seqs 1 -db {db} -query {query} -out {out} -num_threads {cpus}",
            db=db,
            query=proteome,
            out=out + "human_offtarget.tbl",
            cpus=cpus)