Esempio n. 1
0
if len(sys.argv) > 3 and sys.argv[3] == 'debug':
    debug = True

MINOR_BASE_MIN = 0.15

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim',
                     delimiter='\t',
                     doublequote=False,
                     quotechar='',
                     lineterminator='\n',
                     quoting=csv.QUOTE_NONE)

strsnv_input = msi.open_file(sys.argv[1], 'r')
strsnv_csv = csv.DictReader(strsnv_input, dialect='tab_delim')

haplo_fbase = sys.argv[1].split('/')[-1].split('.')[0]
haplo_output = msi.open_file(
    haplo_fbase + '.haplotype_cts_' + alleles_to_report + '.txt', 'w')
haplo_csv = csv.writer(haplo_output, dialect='tab_delim')

print "\n**Running {0}, with STR-SNV input: {1}".format(
    script_name, sys.argv[1])


#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# General methods                                                             #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
def extract_haplotypes(crow):
Esempio n. 2
0
user_home   = os.path.expanduser("~")

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Check for valid command line arguments                                      #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
if len(sys.argv) < 2:
  print "Usage: ", script_name, "<flank_variants_file>"
  sys.exit(1)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', escapechar='', quoting=csv.QUOTE_NONE)
snv_outfn  = sys.argv[1].split(".")[0] + '.flank_alleles.txt'

in_csv = csv.reader(msi.open_file(sys.argv[1],'r'), dialect='tab_delim')
out_csv = csv.writer(msi.open_file(snv_outfn, 'w'), dialect='tab_delim')

print "\n**Running {0}".format(script_name)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Main program logic                                                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
#-----------------------------------------------------------------------------#
# Read STR flanking SNV information and write alternate flanking alleles      #
#-----------------------------------------------------------------------------#
out_csv.writerow(['Chr', 'SNVPos', 'Ref', 'Alt', 'TYPE', 'GT', 'AF', 'STRName', '5or3pr', 'FlankStart', 'FlankEnd'])
for srow in in_csv:
  gt_parsed = srow[9].split(':')
  info_parsed = srow[7].split(';')
  var_type = [fld[5:] for fld in info_parsed if fld[0:5] == 'TYPE=']
debug = False
if len(sys.argv) > 3 and sys.argv[3] == 'debug':
    debug = True

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim',
                     delimiter='\t',
                     doublequote=False,
                     quotechar='',
                     lineterminator='\n',
                     quoting=csv.QUOTE_NONE)

summ_input = msi.open_file(sys.argv[1], 'r')
minor_input = msi.open_file(sys.argv[2], 'r')

summ_fbase = sys.argv[1].split('/')[-1].split('.')[0]

haplo_output = msi.open_file(summ_fbase + '.STR_SNV.minor_haplotypes.txt', 'w')
haplo_csv = csv.writer(haplo_output, dialect='tab_delim')

print "\n**Running {0}, with STR/SNV inputs: {1}, {2}".format(
    script_name, sys.argv[1], sys.argv[2])


#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# General methods                                                             #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
def extract_haplotypes(snv_base, str_counts):
Esempio n. 4
0
alleles_to_report = 'all'
if len(sys.argv) > 2 and sys.argv[2] == 'major':
  alleles_to_report = 'major'

debug = False
if len(sys.argv) > 3 and sys.argv[3] == 'debug':
  debug = True 

MINOR_BASE_MIN = 0.15

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE)

strsnv_input  = msi.open_file(sys.argv[1], 'r')
strsnv_csv = csv.DictReader(strsnv_input, dialect='tab_delim') 

haplo_fbase = sys.argv[1].split('/')[-1].split('.')[0]
haplo_output = msi.open_file(haplo_fbase + '.haplotype_cts_' + alleles_to_report + '.txt', 'w')
haplo_csv = csv.writer(haplo_output, dialect='tab_delim')

print "\n**Running {0}, with STR-SNV input: {1}".format(script_name, sys.argv[1])

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# General methods                                                             #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
def extract_haplotypes(crow):
  # Returned tuples: [('C','11',889), ('C','10',96), ('C','9',9), ('C','12',5)]
  snv_base      = crow['SNV Allele']
  str_alleles   = [s_allele.lstrip() for s_allele in crow['Motif Rpts'].split(',')]
Esempio n. 5
0
# Check for valid arguments, and that files exist                             #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
if len(sys.argv) < 4:
  print "Usage: ", script_name, "<str_snv_summary_file> <str_info> <probe_cts> [debug]"
  sys.exit(1)

debug = False
if len(sys.argv) > 4 and sys.argv[4] == 'debug':
  debug = True 

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE)

summ_input  = msi.open_file(sys.argv[1], 'r')
str_input   = msi.open_file(sys.argv[2], 'r')
probe_input = msi.open_file(sys.argv[3], 'r')

str_csv = csv.DictReader(str_input, dialect='tab_delim')
summ_fbase = sys.argv[1].split('/')[-1].split('.')[0] 

final_output = msi.open_file(summ_fbase + '.STR_SNV.final.txt', 'w')
final_csv = csv.writer(final_output, dialect='tab_delim')

FLANK_SIZE = msi.FLANK_SIZE
ALLELE2_MIN_PCT = msi.ALLELE2_MIN_PCT

print "\n**Running {0}, with STR/SNV input: {1}".format(script_name, sys.argv[1])
print "Parameters: Flank size: {0}".format(FLANK_SIZE)
Esempio n. 6
0
debug = False
if len(sys.argv) > 4 and sys.argv[4] == 'debug':
    debug = True

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim',
                     delimiter='\t',
                     doublequote=False,
                     quotechar='',
                     lineterminator='\n',
                     quoting=csv.QUOTE_NONE)

summ_input = msi.open_file(sys.argv[1], 'r')
str_input = msi.open_file(sys.argv[2], 'r')
probe_input = msi.open_file(sys.argv[3], 'r')

str_csv = csv.DictReader(str_input, dialect='tab_delim')
summ_fbase = sys.argv[1].split('/')[-1].split('.')[0]

final_output = msi.open_file(summ_fbase + '.STR_SNV.final.txt', 'w')
final_csv = csv.writer(final_output, dialect='tab_delim')

FLANK_SIZE = msi.FLANK_SIZE
ALLELE2_MIN_PCT = msi.ALLELE2_MIN_PCT

print "\n**Running {0}, with STR/SNV input: {1}".format(
    script_name, sys.argv[1])
print "Parameters: Flank size: {0}".format(FLANK_SIZE)
Esempio n. 7
0
if len(sys.argv) < 3:
    print "Usage: ", script_name, "<str_summary> <probe_rdcts> <str_info>"
    sys.exit(1)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim',
                     delimiter='\t',
                     doublequote=False,
                     quotechar='',
                     lineterminator='\n',
                     escapechar='',
                     quoting=csv.QUOTE_NONE)

summ_input = msi.open_file(sys.argv[1], 'r')
rdct_input = msi.open_file(sys.argv[2], 'r')
str_input = msi.open_file(sys.argv[3], 'r')
final_fn = sys.argv[1].replace('_summary', '_final', 1)
rpt_output = msi.open_file(final_fn, 'w')

str_csv = csv.DictReader(str_input, dialect='tab_delim')

FLANK_SIZE = msi.FLANK_SIZE
ALLELE2_MIN_PCT = msi.ALLELE2_MIN_PCT

print "\n**Running {0}, with summary input: {1}, probe counts: {2}".format(
    script_name, sys.argv[1], sys.argv[2])
print "STR file is: {0}, flank size is: {1}".format(sys.argv[3], FLANK_SIZE)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
Esempio n. 8
0
script_name = os.path.basename(__file__)
user_home   = os.path.expanduser("~")

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Check for valid arguments, and that files exist                             #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
if len(sys.argv) < 4:
  print "Usage: ", script_name, "<probe_info> <str_info> <flank_snvs> <bam_file>"
  sys.exit(1)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', escapechar='', quoting=csv.QUOTE_NONE)

probe_input  = msi.open_file(sys.argv[1], 'r')
probe_csv = csv.DictReader(probe_input, dialect='tab_delim')

str_input    = msi.open_file(sys.argv[2], 'r')
str_csv = csv.DictReader(str_input, dialect='tab_delim')

fsnv_input   = msi.open_file(sys.argv[3], 'r')
fsnv_csv = csv.DictReader(fsnv_input, dialect='tab_delim')

sam_fn = sys.argv[4]
sam_or_bam = sam_fn[-3:]
if os.path.isfile(sam_fn) and os.access(sam_fn, os.R_OK):
  sam_input = pysam.Samfile(sam_fn,'rb') if sam_or_bam == 'bam' else pysam.Samfile(sam_fn,'r')
else:
  print "Unable to open {0} file for input: {1}".format(sam_or_bam, sam_fn)
  sys.exit(1)
# Check for valid arguments, and that files exist                             #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
if len(sys.argv) < 3:
  print "Usage: ", script_name, "<str_snv_summary(mix)> <str_snv_final(minor)> [debug]"
  sys.exit(1)

debug = False
if len(sys.argv) > 3 and sys.argv[3] == 'debug':
  debug = True 

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim', delimiter='\t', doublequote=False, quotechar='', lineterminator='\n', quoting=csv.QUOTE_NONE)

summ_input   = msi.open_file(sys.argv[1], 'r')
minor_input  = msi.open_file(sys.argv[2], 'r')

summ_fbase = sys.argv[1].split('/')[-1].split('.')[0] 

haplo_output = msi.open_file(summ_fbase + '.STR_SNV.minor_haplotypes.txt', 'w')
haplo_csv = csv.writer(haplo_output, dialect='tab_delim')

print "\n**Running {0}, with STR/SNV inputs: {1}, {2}".format(script_name, sys.argv[1], sys.argv[2])

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# General methods                                                             #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
def extract_haplotypes(snv_base, str_counts):
  # snv_base: 'C'  str_counts: [[10,11], [38,6], 63] 
  # returned tuples: [('C',10,38,0.6,48),('C',11,6,0.1,48)]
Esempio n. 10
0
    print "Usage: ", script_name, "<flank_variants_file>"
    sys.exit(1)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Open/initialize output files and general variables                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
csv.register_dialect('tab_delim',
                     delimiter='\t',
                     doublequote=False,
                     quotechar='',
                     lineterminator='\n',
                     escapechar='',
                     quoting=csv.QUOTE_NONE)
snv_outfn = sys.argv[1].split(".")[0] + '.flank_alleles.txt'

in_csv = csv.reader(msi.open_file(sys.argv[1], 'r'), dialect='tab_delim')
out_csv = csv.writer(msi.open_file(snv_outfn, 'w'), dialect='tab_delim')

print "\n**Running {0}".format(script_name)

#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
# Main program logic                                                          #
#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#
#-----------------------------------------------------------------------------#
# Read STR flanking SNV information and write alternate flanking alleles      #
#-----------------------------------------------------------------------------#
out_csv.writerow([
    'Chr', 'SNVPos', 'Ref', 'Alt', 'TYPE', 'GT', 'AF', 'STRName', '5or3pr',
    'FlankStart', 'FlankEnd'
])
for srow in in_csv: