if len(sys.argv) > 3 and sys.argv[3] == 'debug': debug = True MINOR_BASE_MIN = 0.15 #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE) strsnv_input = msi.open_file(sys.argv[1], 'r') strsnv_csv = csv.DictReader(strsnv_input, dialect='tab_delim') haplo_fbase = sys.argv[1].split('/')[-1].split('.')[0] haplo_output = msi.open_file( haplo_fbase + '.haplotype_cts_' + alleles_to_report + '.txt', 'w') haplo_csv = csv.writer(haplo_output, dialect='tab_delim') print "\n**Running {0}, with STR-SNV input: {1}".format( script_name, sys.argv[1]) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # General methods # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# def extract_haplotypes(crow):
user_home = os.path.expanduser("~") #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Check for valid command line arguments # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# if len(sys.argv) < 2: print "Usage: ", script_name, "<flank_variants_file>" sys.exit(1) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', escapechar='', quoting=csv.QUOTE_NONE) snv_outfn = sys.argv[1].split(".")[0] + '.flank_alleles.txt' in_csv = csv.reader(msi.open_file(sys.argv[1],'r'), dialect='tab_delim') out_csv = csv.writer(msi.open_file(snv_outfn, 'w'), dialect='tab_delim') print "\n**Running {0}".format(script_name) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Main program logic # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# #-----------------------------------------------------------------------------# # Read STR flanking SNV information and write alternate flanking alleles # #-----------------------------------------------------------------------------# out_csv.writerow(['Chr', 'SNVPos', 'Ref', 'Alt', 'TYPE', 'GT', 'AF', 'STRName', '5or3pr', 'FlankStart', 'FlankEnd']) for srow in in_csv: gt_parsed = srow[9].split(':') info_parsed = srow[7].split(';') var_type = [fld[5:] for fld in info_parsed if fld[0:5] == 'TYPE=']
debug = False if len(sys.argv) > 3 and sys.argv[3] == 'debug': debug = True #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE) summ_input = msi.open_file(sys.argv[1], 'r') minor_input = msi.open_file(sys.argv[2], 'r') summ_fbase = sys.argv[1].split('/')[-1].split('.')[0] haplo_output = msi.open_file(summ_fbase + '.STR_SNV.minor_haplotypes.txt', 'w') haplo_csv = csv.writer(haplo_output, dialect='tab_delim') print "\n**Running {0}, with STR/SNV inputs: {1}, {2}".format( script_name, sys.argv[1], sys.argv[2]) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # General methods # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# def extract_haplotypes(snv_base, str_counts):
alleles_to_report = 'all' if len(sys.argv) > 2 and sys.argv[2] == 'major': alleles_to_report = 'major' debug = False if len(sys.argv) > 3 and sys.argv[3] == 'debug': debug = True MINOR_BASE_MIN = 0.15 #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE) strsnv_input = msi.open_file(sys.argv[1], 'r') strsnv_csv = csv.DictReader(strsnv_input, dialect='tab_delim') haplo_fbase = sys.argv[1].split('/')[-1].split('.')[0] haplo_output = msi.open_file(haplo_fbase + '.haplotype_cts_' + alleles_to_report + '.txt', 'w') haplo_csv = csv.writer(haplo_output, dialect='tab_delim') print "\n**Running {0}, with STR-SNV input: {1}".format(script_name, sys.argv[1]) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # General methods # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# def extract_haplotypes(crow): # Returned tuples: [('C','11',889), ('C','10',96), ('C','9',9), ('C','12',5)] snv_base = crow['SNV Allele'] str_alleles = [s_allele.lstrip() for s_allele in crow['Motif Rpts'].split(',')]
# Check for valid arguments, and that files exist # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# if len(sys.argv) < 4: print "Usage: ", script_name, "<str_snv_summary_file> <str_info> <probe_cts> [debug]" sys.exit(1) debug = False if len(sys.argv) > 4 and sys.argv[4] == 'debug': debug = True #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE) summ_input = msi.open_file(sys.argv[1], 'r') str_input = msi.open_file(sys.argv[2], 'r') probe_input = msi.open_file(sys.argv[3], 'r') str_csv = csv.DictReader(str_input, dialect='tab_delim') summ_fbase = sys.argv[1].split('/')[-1].split('.')[0] final_output = msi.open_file(summ_fbase + '.STR_SNV.final.txt', 'w') final_csv = csv.writer(final_output, dialect='tab_delim') FLANK_SIZE = msi.FLANK_SIZE ALLELE2_MIN_PCT = msi.ALLELE2_MIN_PCT print "\n**Running {0}, with STR/SNV input: {1}".format(script_name, sys.argv[1]) print "Parameters: Flank size: {0}".format(FLANK_SIZE)
debug = False if len(sys.argv) > 4 and sys.argv[4] == 'debug': debug = True #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE) summ_input = msi.open_file(sys.argv[1], 'r') str_input = msi.open_file(sys.argv[2], 'r') probe_input = msi.open_file(sys.argv[3], 'r') str_csv = csv.DictReader(str_input, dialect='tab_delim') summ_fbase = sys.argv[1].split('/')[-1].split('.')[0] final_output = msi.open_file(summ_fbase + '.STR_SNV.final.txt', 'w') final_csv = csv.writer(final_output, dialect='tab_delim') FLANK_SIZE = msi.FLANK_SIZE ALLELE2_MIN_PCT = msi.ALLELE2_MIN_PCT print "\n**Running {0}, with STR/SNV input: {1}".format( script_name, sys.argv[1]) print "Parameters: Flank size: {0}".format(FLANK_SIZE)
if len(sys.argv) < 3: print "Usage: ", script_name, "<str_summary> <probe_rdcts> <str_info>" sys.exit(1) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', escapechar='', quoting=csv.QUOTE_NONE) summ_input = msi.open_file(sys.argv[1], 'r') rdct_input = msi.open_file(sys.argv[2], 'r') str_input = msi.open_file(sys.argv[3], 'r') final_fn = sys.argv[1].replace('_summary', '_final', 1) rpt_output = msi.open_file(final_fn, 'w') str_csv = csv.DictReader(str_input, dialect='tab_delim') FLANK_SIZE = msi.FLANK_SIZE ALLELE2_MIN_PCT = msi.ALLELE2_MIN_PCT print "\n**Running {0}, with summary input: {1}, probe counts: {2}".format( script_name, sys.argv[1], sys.argv[2]) print "STR file is: {0}, flank size is: {1}".format(sys.argv[3], FLANK_SIZE) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
script_name = os.path.basename(__file__) user_home = os.path.expanduser("~") #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Check for valid arguments, and that files exist # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# if len(sys.argv) < 4: print "Usage: ", script_name, "<probe_info> <str_info> <flank_snvs> <bam_file>" sys.exit(1) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', escapechar='', quoting=csv.QUOTE_NONE) probe_input = msi.open_file(sys.argv[1], 'r') probe_csv = csv.DictReader(probe_input, dialect='tab_delim') str_input = msi.open_file(sys.argv[2], 'r') str_csv = csv.DictReader(str_input, dialect='tab_delim') fsnv_input = msi.open_file(sys.argv[3], 'r') fsnv_csv = csv.DictReader(fsnv_input, dialect='tab_delim') sam_fn = sys.argv[4] sam_or_bam = sam_fn[-3:] if os.path.isfile(sam_fn) and os.access(sam_fn, os.R_OK): sam_input = pysam.Samfile(sam_fn,'rb') if sam_or_bam == 'bam' else pysam.Samfile(sam_fn,'r') else: print "Unable to open {0} file for input: {1}".format(sam_or_bam, sam_fn) sys.exit(1)
# Check for valid arguments, and that files exist # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# if len(sys.argv) < 3: print "Usage: ", script_name, "<str_snv_summary(mix)> <str_snv_final(minor)> [debug]" sys.exit(1) debug = False if len(sys.argv) > 3 and sys.argv[3] == 'debug': debug = True #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE) summ_input = msi.open_file(sys.argv[1], 'r') minor_input = msi.open_file(sys.argv[2], 'r') summ_fbase = sys.argv[1].split('/')[-1].split('.')[0] haplo_output = msi.open_file(summ_fbase + '.STR_SNV.minor_haplotypes.txt', 'w') haplo_csv = csv.writer(haplo_output, dialect='tab_delim') print "\n**Running {0}, with STR/SNV inputs: {1}, {2}".format(script_name, sys.argv[1], sys.argv[2]) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # General methods # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# def extract_haplotypes(snv_base, str_counts): # snv_base: 'C' str_counts: [[10,11], [38,6], 63] # returned tuples: [('C',10,38,0.6,48),('C',11,6,0.1,48)]
print "Usage: ", script_name, "<flank_variants_file>" sys.exit(1) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Open/initialize output files and general variables # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', escapechar='', quoting=csv.QUOTE_NONE) snv_outfn = sys.argv[1].split(".")[0] + '.flank_alleles.txt' in_csv = csv.reader(msi.open_file(sys.argv[1], 'r'), dialect='tab_delim') out_csv = csv.writer(msi.open_file(snv_outfn, 'w'), dialect='tab_delim') print "\n**Running {0}".format(script_name) #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# # Main program logic # #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+# #-----------------------------------------------------------------------------# # Read STR flanking SNV information and write alternate flanking alleles # #-----------------------------------------------------------------------------# out_csv.writerow([ 'Chr', 'SNVPos', 'Ref', 'Alt', 'TYPE', 'GT', 'AF', 'STRName', '5or3pr', 'FlankStart', 'FlankEnd' ]) for srow in in_csv: