예제 #1
0
def run():
    params = read_run_params()
    run = params["current_run"]
    out_home = params["container"] + "output/"
    out_dir = out_home + run + "/"

    ##### generate genome wide variants
    if params["source"] == "alignment_all":

        df = run_alignment_analysis(params, "alignment_all")
        variant_screener.genomewide_mutations(df, out_dir, run)

    elif params["source"] == "alignment_current":

        df = run_alignment_analysis(params, "alignment_current")
        variant_screener.genomewide_mutations(df, out_dir, run)
예제 #2
0
def run():
    params = read_run_params()
    run = params["current_run"]
    out_home = params["container"] + "output/"
    out_dir = out_home + run + "/"

    ##### preprare for variant calling
    df_muttable = pd.read_excel(out_dir + "1_genomic_mutations_" + run +
                                ".xlsx")

    if params["source"] == "alignment_current":
        df_muttable_prev = pd.read_excel(
            out_home + "/previous_genomic_table/1_genomic_mutations_all.xlsx")
        df_muttable = df_muttable.append(df_muttable_prev, ignore_index=True)
        df_muttable.drop_duplicates("strain", inplace=True)
        df_muttable.to_excel(out_dir + "1_genomic_mutations_all.xlsx",
                             index=False)
        df_muttable.to_excel(
            out_home + "/previous_genomic_table/1_genomic_mutations_all_" +
            run + ".xlsx",
            index=False)

    df_muttable.nucleotide_change = df_muttable.nucleotide_change.apply(
        ast.literal_eval)
    df_muttable.nt_aa_pair = df_muttable.nt_aa_pair.apply(ast.literal_eval)

    # #### generate per variant summary with pangolin
    db_pangolin = params[
        "container"] + "pangolin_analysis/pangolin_lineage_assignment_for_pipeline_" + run + ".xlsx"
    db = out_dir + "/database/5_strains_run_group_analysis_table_with_samplesheet.csv"

    variant_screener.variant_screener_per_variant_summary_with_pangolin(
        df_muttable, db, db_pangolin, out_dir, run)

    print("check...")
    print(df_muttable.head())
    # generate_snp_table
    log_msg("generating snp table for s protein..")
    df_muttable.columns = [
        "strain", "total_mutations", "nucleotide_change", "nt_aa_pair",
        "deletions"
    ]
    mutation.generate_snp_table(df_muttable, db, out_dir, run)

    variant_screener.variant_screener_summary_pangolin(df_muttable, out_dir,
                                                       run)
예제 #3
0
def get_combine_df():

    params = read_run_params()

    all_strains = []

    nta_dir = params["container"]+"input_alignment/"
    
    nta_files = params["nta_group_v1"]

    for nta_group in sorted(nta_files) :
        log_msg("run group is - "+ nta_group)
        align_file = nta_files[nta_group].split(",")[0]
        adj = int(nta_files[nta_group].split(",")[1])

        if "reference" in nta_group:
            all_strains.extend(genomicSequenceFromAlignment(nta_dir+align_file,reference=True))
        else:            
            all_strains.extend(genomicSequenceFromAlignment(nta_dir+align_file,adjustment=adj))

    df = pd.DataFrame(all_strains)
    df = filterMainAlignment(df,"cds")

    return df
예제 #4
0
import pandas as pd
import numpy as np
from datetime import timedelta
import gen_utils.gen_covid as covid
from gen_utils.gen_io import read_run_params,log_msg
import sys

params = read_run_params()
run = params["current_run"]
out_dir = params["container"]+"output/"+run+"/"
df = pd.read_csv(out_dir+"4_mcov_strain_variant_map_covid_pangolin_db_input_"+run+".csv")
df = df[df.quality=="HQ"]
start_date = sys.argv[1]
end_date = sys.argv[2]

dfs=[]
for voi in covid.covid_variants.pangolin:
     print(voi)
     ###take unique patients with variant
     keep_mrns_variant = np.unique(df[df.variant==voi]["MRN"])
     df_mrns = df[df.MRN.isin(keep_mrns_variant)]
     df_mrns = df_mrns[df_mrns.variant==voi] ###important step--remove non b117 variant 
     df_mrns.sort_values("COLLECTION_DT",inplace=True)
     df_mrns.drop_duplicates("MRN",keep="first",inplace=True)

     keep_mrns_not_variant = np.unique(df[df.variant!=voi]["MRN"])
     df_mrns_not_variant = df[df.MRN.isin(keep_mrns_not_variant)]
     df_mrns_not_variant = df_mrns_not_variant[df_mrns_not_variant.variant!=voi]
     df_mrns_not_variant.sort_values("COLLECTION_DT",inplace=True)
     df_mrns_not_variant.drop_duplicates("MRN",keep="first",inplace=True)
예제 #5
0
def getcon():
    params = read_run_params()
    SQLALCHEMY_DATABASE_URI = "mysql+pymysql://" + params["dbcred"]
    sqlEngine = sqlalchemy.create_engine(SQLALCHEMY_DATABASE_URI, echo=False)
    dbConnection = sqlEngine.connect()
    return dbConnection