Example #1
0
def profile_by_query(filename, genome_id, selected_loci, database):
    # TODO: collect new alleles from here
    allele_ids = ",".join("'{}'".format(operations.make_seqid(rec.seq)) for rec in SeqIO.parse(filename, "fasta"))
    locus_ids = ",".join("'{}'".format(x) for x in selected_loci)
    query = "select locus_id, allele_id from sequence where allele_id in ({}) and locus_id in ({});".format(allele_ids, locus_ids)
    profile = sql_query(query, database=database).drop_duplicates("allele_id")  # ensure allele_id is mapped only once
    profile = profile.drop_duplicates("locus_id").set_index("locus_id")  # ensure locus_id exists only once
    profile = profile.rename(columns={"allele_id": genome_id}).iloc[:, 0]
    return profile
Example #2
0
 def get(self, id):
     if len(id) != 32:
         abort(404)
     sql = "select * from upload where batch_id='{}';".format(id)
     results = db.sql_query(sql, database=DB).to_dict(orient="records")
     if len(results) == 0:
         abort(404)
     Thread(target=internals.profiling_api,
            args=(id, "Salmonella_5k", 95),
            daemon=True).start()
     return {"message": "Profiling dataset {}".format(id)}, 200
Example #3
0
    def get(self, id):
        sql = None
        if len(id) == 32:
            sql = "select seq_id, batch_id, filename from upload where batch_id='{}';".format(
                id)
        elif len(id) == 64:
            sql = "select seq_id, batch_id, filename from upload where seq_id='{}';".format(
                id)
        else:
            abort(404)

        results = db.sql_query(sql, database=DB).to_dict(orient="records")
        if len(results) != 0:
            return results
        else:
            abort(404)
Example #4
0
def profiling(output_dir, input_dir, database, threads, occr_level=None, selected_loci=None, logger=None,
              aligcov_cut=0.5, identity=90):
    load_database_config()
    if not logger:
        logger = logs.console_logger(__name__)

    logger.info("Renaming contigs...")
    query_dir = files.joinpath(output_dir, "query")
    files.create_if_not_exist(query_dir)
    namemap = rename(query_dir, input_dir)
    with open(files.joinpath(output_dir, "namemap.json"), "w") as f:
        f.write(json.dumps(namemap))

    if os.path.isdir(database):
        logger.info("Profiling loci...")
        refseq_fna = files.joinpath(database, "panRefSeq.fa")
        profile_loci(refseq_fna, query_dir, output_dir, aligcov_cut, identity, threads)

        logger.info("Allocating alleles...")
        profile_alleles(query_dir, database, output_dir, threads, occr_level)
    else:
        logger.info("Identifying loci and allocating alleles...")

        # select loci by scheme
        if selected_loci:
            selected_loci = set(selected_loci)
        else:
            query = "select locus_id from scheme where occurence>={};".format(occr_level)
            selected_loci = set(sql_query(query, database=database).iloc[:, 0])

        temp_dir = os.path.join(query_dir, "temp")
        files.create_if_not_exist(temp_dir)

        collect = []
        args = [(os.path.join(query_dir, filename), temp_dir) for filename in os.listdir(query_dir) if filename.endswith(".fa")]
        with ProcessPoolExecutor(threads) as executor:
            for filename in executor.map(identify_loci, args):
                genome_id = files.fasta_filename(filename)
                target_file = os.path.join(temp_dir, genome_id + ".locus.fna")
                profile = profile_by_query(target_file, genome_id, selected_loci, database)
                collect.append(profile)
        result = pd.concat(collect, axis=1)
        result.to_csv(files.joinpath(output_dir, "wgmlst.tsv"), sep="\t")

    shutil.rmtree(query_dir)
Example #5
0
 def get(self):
     sql = "select seq_id, filename from upload;"
     results = db.sql_query(sql, database=DB).to_dict(orient="records")
     return results
Example #6
0
 def get(self):
     sql = "select id from dendrogram;"
     results = db.sql_query(sql, database=DB).to_dict(orient="records")
     return results
Example #7
0
 def get(self):
     sql = "select id, occurrence, database from profile;"
     results = db.sql_query(sql, database=DB).to_dict(orient="records")
     return results