def pgx(args, output_dir=None): # consts if(output_dir == None): args.output = os.path.join(args.output_dir, 'pgx') else: args.output = output_dir if(not os.path.isdir(args.output)): os.makedirs(args.output) # code print str(datetime.datetime.now()) + ': Performing PharmGKB + ClinVar (pgx) annotation' #pharmgkb annotation pgxUtils.pgx_annotator(args.vcf, os.path.join(resources_path, "pgx_vars", "clinical_ann_metadata-snvs.txt"), os.path.join(args.output, "pharmacogenomics")) pgxUtils.star_caller(os.path.join(resources_path, "pgx_haps/"), args.vcf, os.path.join(args.output, "pharmacogenomics")) print str(datetime.datetime.now()) + ': Done with pgx/clinvar annotation'
def pgx(args): # consts args.output = args.output_dir ########## HELPER METHODS ############### #wrapper to annotate snvs/indels # def annotate_snvs(snv_in, snv_out_stem): # # #convert input file to annovar format # if ".vcf" in snv_in: # os.system("perl "+cur_path+"/stanovar/convert2annovar.pl "+snv_in+" --includeinfo -format vcf4 > "+snv_out_stem+".snv.annovar.txt") # else: # print >> sys.stderr, "Error in annotate_vars.py: Invalid vcf input file format" # exit(1) # # #read in command file # #yaml_in = "/stanovar/config/annotation_testing_subset.yaml" # #print("USING TESTING ANNOTATION SUBSET") # yaml_in = "/stanovar/config/annotation.yaml" # commands = parse_yaml_commands(snv_out_stem, yaml_in) # return commands ########## MAIN CODE ##################### print str(datetime.datetime.now()) + ': Performing PharmGKB + ClinVar (pgx) annotation' # # GENOTYPING # #pool of processors for concurrent processing # pool = multiprocessing.Pool(processes = int(args.num_threads)) # # command_call = [] # #call targeted genotypes and perform clinvar and PharmGKB annotation, if BAM file provided # if args.input_bam != "None": # if ".bam" in args.input_bam: # # choose appropriate resource files based on specified reference (hg19, GRCh37, etc.). Default file (e.g. pgx_cd_5.7.13.snvs.vcf.gz) is grch37 (does not include chr prefix), other file (pgx_cd_5.7.13_hg19.snvs.vcf.gz) is hg19 (includes chr prefix). # pgx_snv_interval_filename = 'pgx_cd_5.7.13.snvs.vcf.gz_b37.vcf.gz' if args.ref.lower() == 'grch37' else 'pgx_cd_5.7.13.snvs_hg19.vcf.gz' if args.ref.lower() == 'hg19' else '' # pgx_indel_interval_filename = 'pgx_cd_5.7.13.b37.indels.vcf' if args.ref.lower() == 'grch37' else 'pgx_cd_5.7.13.indels.vcf' if args.ref.lower() == 'hg19' else '' # # clinvar_indel_interval_filename = 'clinvar_3.15.13.b37.indels.vcf' if args.ref.lower() == 'grch37' else 'clinvar_3.15.13.indels.vcf' if args.ref.lower() == 'hg19' else '' # #debug # # clinvar_indel_interval_filename = 'clinvar_3.15.13.indels.vcf' # clinvar_indel_interval_filename = 'clinvar_indels.vcf.recode.vcf' # # # then call appropriate commands # cmd = "sh "+cur_path+"/target_snvs.sh "+args.input_bam+" "+cur_path+"/resource/intervals/clinvar.interval_list "+args.output+"/clinvar.snvs"+' '+args.reference_sequence+' '+args.dbsnp # #debug # print 'cmd: ' + cmd # command_call.append(cmd) # # cmd = "sh "+cur_path+"/target_snvs.sh "+args.input_bam+" "+cur_path+"/resource/intervals/"+pgx_snv_interval_filename+" "+args.output+"/pgx.snvs"+' '+args.reference_sequence+' '+args.dbsnp # #debug # print 'cmd: ' + cmd # command_call.append(cmd) # # cmd = "sh "+cur_path+"/target_indels.sh "+args.input_bam+" "+cur_path+"/resource/intervals/"+clinvar_indel_interval_filename+" "+args.output+"/clinvar.indels"+' '+args.reference_sequence+' '+args.dbsnp # #debug # print 'cmd: ' + cmd # command_call.append(cmd) # # cmd = "sh "+cur_path+"/target_indels.sh "+args.input_bam+" "+cur_path+"/resource/intervals/"+pgx_indel_interval_filename+" "+args.output+"/pgx.indels"+' '+args.reference_sequence+' '+args.dbsnp # #debug # print 'cmd: ' + cmd # command_call.append(cmd) # # #log file specification # log_file = open(args.output+"/stmp2.log", "w") # log_file.write(">>> Sequence to medical phenotypes log file <<<\n") # log_file.write("\nstmp2 started: "+strftime("%a, %d %b %Y %H:%M:%S +0000", localtime())+"\n\n") # # #map calling commands # res_calls = pool.map(call_process, command_call) # for item in res_calls: # log_file.write(item+"\n") # # #merge clinvar, and pgx calls and re-annotate rsid # add_rsid(cur_path+"/resource/intervals/"+pgx_snv_interval_filename, args.output+"/pgx.snvs.filtered.vcf", args.output+"/pgx.snvs.annotated.vcf") # add_rsid(cur_path+"/resource/intervals/"+pgx_indel_interval_filename, args.output+"/pgx.indels.filtered.vcf", args.output+"/pgx.indels.annotated.vcf") # # add_rsid(cur_path+"/resource/intervals/clinvar_3.15.13.snvs.vcf", args.output+"/clinvar.snvs.filtered.vcf", args.output+"/clinvar.snvs.annotated.vcf") # # add_rsid(cur_path+"/resource/intervals/clinvar_3.15.13.indels.vcf", args.output+"/clinvar.indels.filtered.vcf", args.output+"/clinvar.indels.annotated.vcf") # os.system("sh "+cur_path+"/combine_variants.sh "+args.output+"/pgx.snvs.annotated.vcf "+args.output+"/pgx.indels.annotated.vcf "+args.output+"/pgx.all.vcf") # os.system("sh "+cur_path+"/combine_variants.sh "+args.output+"/clinvar.snvs.filtered.vcf "+args.output+"/clinvar.indels.filtered.vcf "+args.output+"/clinvar.all.vcf") #annotate and prioritize clinvar variants using Stanovar # command_ann_clin = annotate_snvs(args.output+"/clinvar.all.vcf", args.output+"/clinvar") # res_ann = pool.map(call_process, command_ann_clin) # for item in res_ann: # log_file.write(item+"\n") # os.system("perl "+cur_path+"/stanovar/summarize_annovar.pl "+args.output+"/clinvar"+" "+cur_path+"/stanovar/humandb '"+get_head(args.output+"/clinvar.all.vcf")+"'") # diseaseUtils.tiers_target(args.output+"/clinvar.genome_summary.tsv", args.output+"/clinvar", args.target_genes, 0.01, args.ethnicity) # if args.sfs_file != "None": # for i in range(1,5): # diseaseUtils.filter_sfs(args.output+"/clinvar.tier"+str(i)+".txt", args.sfs_file, args.output+"/clinvar.tier"+str(i)+"-sfs_filtered.txt", 2) #pharmgkb annotation pgxUtils.pgx_annotator(args.vcf, os.path.join(resources_path, "pgx_vars", "clinical_ann_metadata-snvs.txt"), os.path.join(args.output, "pharmacogenomics")) pgxUtils.star_caller(os.path.join(resources_path, "pgx_haps/"), args.vcf, os.path.join(args.output, "pharmacogenomics")) # pgxUtils.pgx_annotator(os.path.join(args.output, "pgx.all.vcf"), os.path.join(cur_path, "resource/pgx_vars/clinical_ann_metadata-snvs.txt"), os.path.join(args.output, "pharmacogenomics")) # pgxUtils.star_caller(os.path.join(cur_path, "resource/pgx_haps/"), os.path.join(args.output, "pgx.all.vcf"), os.path.join(args.output, "pharmacogenomics")) # else: # print >> sys.stderr, "Error in stmp2.py - input file does not appear to be bam format" # exit(1) print str(datetime.datetime.now()) + ': Done with pgx/clinvar annotation'