コード例 #1
0
def processBamFiles(genome):
    if "main" in inspect.stack()[1][3]:
        filename = INPUT_DIR + genome + ".bam"
    else:
        filename = OUTPUT_DIR + genome + ".bam"
    command1 = "samtools sort " + filename + " " + genome + ".sorted"
    command2 = "samtools index " + genome + ".sorted.bam"
    command3 = (
        "samtools mpileup -uD -f ref_files/TB_H37Rv_sequence_validated.fa "
        + genome
        + ".sorted.bam > "
        + OUTPUT_DIR
        + "mpileup/"
        + genome
        + "_mpileup"
    )
    command4 = (
        "bcftools call -mv -Ov -Vindels "
        + OUTPUT_DIR
        + "mpileup/"
        + genome
        + "_mpileup > "
        + OUTPUT_DIR
        + "vcf/"
        + genome
        + "_unfiltered.vcf"
    )
    command5 = (
        "vcfutils.pl varFilter -d10 -Q20 "
        + OUTPUT_DIR
        + "vcf/"
        + genome
        + "_unfiltered.vcf > "
        + OUTPUT_DIR
        + "vcf/"
        + genome
        + ".vcf"
    )
    command6 = "mv " + genome + ".sorted.bam* " + OUTPUT_DIR
    os.system(command1)
    os.system(command2)
    os.system(command3)
    os.system(command4)
    os.system(command5)
    os.system(command6)

    getAnnotations(genome)
コード例 #2
0
def main():
  if len(sys.argv) < 2:
    sys.stderr.write('USAGE: python processGenomes.py <Genome List>\n')
    sys.exit(1)

  in_file = sys.argv[1]
  genome_list = {}
  files = os.listdir(INPUT_DIR)
  with open(in_file, 'r') as infile:
    for line in infile.readlines():
      genome = line.strip()
      exists = False
      for name in files:
        if genome in name and not exists: 
          ext = name.split('.')[1].strip()
          if ext in EXT_DICT.keys():
            genome_list[genome] = EXT_DICT[ext]
            exists = True
      if not exists:
        sys.stderr.write('No corresponding input file found for genome ' + genome + '!\n')

  count = 0
  outfile = open('Mutation-analysis.log', 'w')
  for genome in genome_list.keys():
    step = genome_list[genome]
    if step == 1:
      mapFastqFiles(genome)
    elif step == 2:
      processSamFiles(genome)
    elif step == 3:
      processBamFiles(genome)
    elif step == 4:
      getAnnotations(genome) 

    count = count + 1
    if count % 100 == 0:
      write_data = 'Processed ' + str(count) + ' genomes ...'
      print write_data 
      outfile.write(write_data + '\n')

  outfile.close()