Example #1
0
def Classify(input,output):
   if "Classify" in _skipsteps or _cls == None:
       run_process(_settings, "touch %s/Propagate/out/%s.clusters"%(_settings.rundir, _settings.PREFIX), "Classify")
       run_process(_settings, "touch %s/Logs/classify.skip"%(_settings.rundir), "Classify")       
       return 0

   if _cls == "FCP" or _cls == "fcp" or _cls == "phylosift" or _cls == "PhyloSift" or _cls == "Phylosift":
       #run_process(_settings, "python %s/python/sort_contigs.py %s/Propagate/in/%s.clusters %s/Propagate/out/%s.clusters %s/Propagate/out/%s.reads.clusters %s/tax_key.tab %s/FindORFS/out/%s.fna.bnk %s/FindORFS/out/%s.faa.bnk %s/FindORFS/out/%s.gene.map %s/Classify/out %s/Scaffold/in/%s.bnk %s"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.DB_DIR, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.rundir, _settings.PREFIX,_settings.AMOS),"Classify")
       sort_contigs("%s/Propagate/in/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.reads.clusters"%(_settings.rundir,_settings.PREFIX),"%s/tax_key.tab"%(_settings.DB_DIR),"%s/FindORFS/out/%s.fna.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.faa.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.gene.map"%(_settings.rundir,_settings.PREFIX),"%s/Classify/out"%(_settings.rundir),"%s/Scaffold/in/%s.bnk"%(_settings.rundir,_settings.PREFIX),"%s"%(_settings.AMOS))
   else:
       #run_process(_settings, "python %s/python/sort_contigs.py %s/Propagate/in/%s.clusters %s/Propagate/out/%s.clusters %s/Propagate/out/%s.reads.clusters %s/class_key.tab %s/FindORFS/out/%s.fna.bnk %s/FindORFS/out/%s.faa.bnk %s/FindORFS/out/%s.gene.map %s/Classify/out %s/Scaffold/in/%s.bnk %s"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.DB_DIR, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.rundir, _settings.PREFIX,_settings.AMOS),"Classify")
       sort_contigs("%s/Propagate/in/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.reads.clusters"%(_settings.rundir,_settings.PREFIX),"%s/class_key.tab"%(_settings.DB_DIR),"%s/FindORFS/out/%s.fna.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.faa.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.gene.map"%(_settings.rundir,_settings.PREFIX),"%s/Classify/out"%(_settings.rundir),"%s/Scaffold/in/%s.bnk"%(_settings.rundir,_settings.PREFIX),"%s"%(_settings.AMOS))
Example #2
0
def Classify(input,output):
   if "Classify" in _skipsteps or _cls == None:
       run_process(_settings, "touch %s/Propagate/out/%s.clusters"%(_settings.rundir, _settings.PREFIX), "Classify")
       run_process(_settings, "touch %s/Logs/classify.skip"%(_settings.rundir), "Classify")       
       return 0

   if _cls.lower() != "metaphyler":
       #run_process(_settings, "python %s/python/sort_contigs.py %s/Propagate/in/%s.clusters %s/Propagate/out/%s.clusters %s/Propagate/out/%s.reads.clusters %s/tax_key.tab %s/FindORFS/out/%s.fna.bnk %s/FindORFS/out/%s.faa.bnk %s/FindORFS/out/%s.gene.map %s/Classify/out %s/Scaffold/in/%s.bnk %s"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.DB_DIR, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.rundir, _settings.PREFIX,_settings.AMOS),"Classify")
       sort_contigs("%s/Propagate/in/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.reads.clusters"%(_settings.rundir,_settings.PREFIX),"%s/tax_key.tab"%(_settings.DB_DIR),"%s/FindORFS/out/%s.fna.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.faa.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.gene.map"%(_settings.rundir,_settings.PREFIX),"%s/Classify/out"%(_settings.rundir),"%s/Scaffold/in/%s.bnk"%(_settings.rundir,_settings.PREFIX),"%s"%(_settings.AMOS), _lowmem)
   else:
       #run_process(_settings, "python %s/python/sort_contigs.py %s/Propagate/in/%s.clusters %s/Propagate/out/%s.clusters %s/Propagate/out/%s.reads.clusters %s/class_key.tab %s/FindORFS/out/%s.fna.bnk %s/FindORFS/out/%s.faa.bnk %s/FindORFS/out/%s.gene.map %s/Classify/out %s/Scaffold/in/%s.bnk %s"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.DB_DIR, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.rundir, _settings.PREFIX,_settings.AMOS),"Classify")
       sort_contigs("%s/Propagate/in/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.reads.clusters"%(_settings.rundir,_settings.PREFIX),"%s/class_key.tab"%(_settings.DB_DIR),"%s/FindORFS/out/%s.fna.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.faa.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.gene.map"%(_settings.rundir,_settings.PREFIX),"%s/Classify/out"%(_settings.rundir),"%s/Scaffold/in/%s.bnk"%(_settings.rundir,_settings.PREFIX),"%s"%(_settings.AMOS), _lowmem)

   # look for contaminant if requested
   if _checkForContaminant == True:
      run_process(_settings, "java -cp %s SizeFasta %s/Assemble/out/%s.asm.contig |awk '{if ($NF >= %s) { print $1} }' > %s/Classify/out/%s.ctg.iids"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX, _minContaminantTrustedLen, _settings.rundir, _settings.PREFIX), "Classify")

      run_process(_settings, "unlink %s/Classify/out/%s.read.iids"%(_settings.rundir, _settings.PREFIX), "Classify")
      for lib in _readlibs:
         run_process(_settings, "java -cp %s SizeFasta %s/Assemble/out/lib%d.unaligned.fasta |awk '{if ($NF >= %s) { print $1} }' >> %s/Classify/out/%s.read.iids"%(_settings.METAMOS_JAVA, _settings.rundir, lib.id, _minContaminantTrustedLen, _settings.rundir, _settings.PREFIX), "Classify")
         run_process(_settings, "java -cp %s SubFile %s/Classify/out/%s.ctg.iids %s/Assemble/out/%s.lib%dcontig.reads 1 >> %s/Classify/out/%s.read.iids"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, lib.id, _settings.rundir, _settings.PREFIX), "Classify") 
      run_process(_settings, "java -cp %s SubFile %s/Classify/out/%s.read.iids %s/Propagate/out/%s.reads.clusters > %s/Classify/out/%s.read.contaminant.clusters"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Classify")

      # now read and figure out percentage
      counts = {}
      total = 0
      filein = open("%s/Classify/out/%s.read.contaminant.clusters"%(_settings.rundir, _settings.PREFIX), 'r')
      for line in filein.xreadlines():
         line = line.replace("\n", "")
         read, annot = line.split()
         total += 1
         if annot not in counts:
            counts[annot] = 0
         counts[annot] += 1
      filein.close()

      run_process(_settings, "unlink %s/Classify/out/contaminant.true"%(_settings.rundir), "Classify")
      majority = 0
      classID = 1
      isContaminant = False
      for annot in counts:
         percentage = float(counts[annot]) / float(total)
         if percentage > majority:
            majority = percentage
            classID = annot
      
      if majority < _minPercentageInOne:
         isContaminant = True
      elif majority < _minPercentageWithAsmSize:
         isContaminant = True

         if os.path.exists("%s/Validate/out/%s.ref.fasta"%(_settings.rundir, _settings.PREFIX)):
            asmSize=getCommandOutput("java -cp %s SizeFasta %s/Assemble/out/%s.asm.contig |awk '{SUM+=$NF; print SUM}' |tail -n 1"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX), False)
            expectedSize=getCommandOutput("java -cp %s SizeFasta %s/Validate/out/%s.ref.fasta |awk '{SUM+=$NF; print SUM}' |tail -n 1"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX), False)
            if int(float(expectedSize) * _maxAsmSize) >= int(asmSize):
                isContaminant = False

      if isContaminant:
         name = getCommandOutput("cat %s/tax_key.tab |awk -F \"\\t\" '{if ($1 == %s) print $NF}'"%(_settings.DB_DIR, classID), False)
         contaminant = open("%s/Classify/out/contaminant.true"%(_settings.rundir), 'w')
         contaminant.write("%s\t%s\n"%(majority*100, name))
         contaminant.close()
Example #3
0
File: bin.py Project: omsai/metAMOS
def Bin(input,output):
   if "Bin" in _skipsteps or _cls == None or "Assemble" in _skipsteps or "assemble" in _skipsteps:
       run_process(_settings, "touch %s/Propagate/out/%s.clusters"%(_settings.rundir, _settings.PREFIX), "Bin")
       run_process(_settings, "touch %s/Logs/bin.skip"%(_settings.rundir), "Bin")       
       return 0

   if _cls.lower() != "metaphyler":
       #run_process(_settings, "python %s/python/sort_contigs.py %s/Propagate/in/%s.clusters %s/Propagate/out/%s.clusters %s/Propagate/out/%s.reads.clusters %s/tax_key.tab %s/FindORFS/out/%s.fna.bnk %s/FindORFS/out/%s.faa.bnk %s/FindORFS/out/%s.gene.map %s/Bin/out %s/Scaffold/in/%s.bnk %s"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.DB_DIR, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.rundir, _settings.PREFIX,_settings.AMOS),"Bin")
       sort_contigs("%s/Propagate/in/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.reads.clusters"%(_settings.rundir,_settings.PREFIX),"%s/tax_key.tab"%(_settings.DB_DIR),"%s/FindORFS/out/%s.fna.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.faa.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.gene.map"%(_settings.rundir,_settings.PREFIX),"%s/Bin/out"%(_settings.rundir),"%s/Scaffold/in/%s.bnk"%(_settings.rundir,_settings.PREFIX),"%s"%(_settings.AMOS), _lowmem)
   else:
       #run_process(_settings, "python %s/python/sort_contigs.py %s/Propagate/in/%s.clusters %s/Propagate/out/%s.clusters %s/Propagate/out/%s.reads.clusters %s/class_key.tab %s/FindORFS/out/%s.fna.bnk %s/FindORFS/out/%s.faa.bnk %s/FindORFS/out/%s.gene.map %s/Bin/out %s/Scaffold/in/%s.bnk %s"%(_settings.METAMOS_UTILS, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.DB_DIR, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.rundir, _settings.PREFIX,_settings.AMOS),"Bin")
       sort_contigs("%s/Propagate/in/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.clusters"%(_settings.rundir,_settings.PREFIX),"%s/Propagate/out/%s.reads.clusters"%(_settings.rundir,_settings.PREFIX),"%s/class_key.tab"%(_settings.DB_DIR),"%s/FindORFS/out/%s.fna.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.faa.bnk"%(_settings.rundir,_settings.PREFIX),"%s/FindORFS/out/%s.gene.map"%(_settings.rundir,_settings.PREFIX),"%s/Bin/out"%(_settings.rundir),"%s/Scaffold/in/%s.bnk"%(_settings.rundir,_settings.PREFIX),"%s"%(_settings.AMOS), _lowmem)

   # look for contaminant if requested
   if _checkForContaminant == True:
      run_process(_settings, "java -cp %s SizeFasta %s/Assemble/out/%s.asm.contig |awk '{if ($NF >= %s) { print $1} }' > %s/Bin/out/%s.ctg.iids"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX, _minContaminantTrustedLen, _settings.rundir, _settings.PREFIX), "Bin")

      run_process(_settings, "unlink %s/Bin/out/%s.read.iids"%(_settings.rundir, _settings.PREFIX), "Bin")
      for lib in _readlibs:
         run_process(_settings, "java -cp %s SizeFasta %s/Assemble/out/lib%d.unaligned.fasta |awk '{if ($NF >= %s) { print $1} }' >> %s/Bin/out/%s.read.iids"%(_settings.METAMOS_JAVA, _settings.rundir, lib.id, _minContaminantTrustedLen, _settings.rundir, _settings.PREFIX), "Bin")
         run_process(_settings, "java -cp %s SubFile %s/Bin/out/%s.ctg.iids %s/Assemble/out/%s.lib%dcontig.reads 1 >> %s/Bin/out/%s.read.iids"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, lib.id, _settings.rundir, _settings.PREFIX), "Bin") 
      run_process(_settings, "java -cp %s SubFile %s/Bin/out/%s.read.iids %s/Propagate/out/%s.reads.clusters > %s/Bin/out/%s.read.contaminant.clusters"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX, _settings.rundir, _settings.PREFIX), "Bin")

      # now read and figure out percentage
      counts = {}
      total = 0
      filein = open("%s/Bin/out/%s.read.contaminant.clusters"%(_settings.rundir, _settings.PREFIX), 'r')
      for line in filein.xreadlines():
         line = line.replace("\n", "")
         read, annot = line.split()
         total += 1
         if annot not in counts:
            counts[annot] = 0
         counts[annot] += 1
      filein.close()

      run_process(_settings, "unlink %s/Bin/out/contaminant.true"%(_settings.rundir), "Bin")
      majority = 0
      classID = 1
      isContaminant = False
      for annot in counts:
         percentage = float(counts[annot]) / float(total)
         if percentage > majority:
            majority = percentage
            classID = annot
      
      if majority < _minPercentageInOne:
         isContaminant = True
      elif majority < _minPercentageWithAsmSize:
         isContaminant = True

         if os.path.exists("%s/Validate/out/%s.ref.fasta"%(_settings.rundir, _settings.PREFIX)):
            asmSize=getCommandOutput("java -cp %s SizeFasta %s/Assemble/out/%s.asm.contig |awk '{SUM+=$NF; print SUM}' |tail -n 1"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX), False)
            expectedSize=getCommandOutput("java -cp %s SizeFasta %s/Validate/out/%s.ref.fasta |awk '{SUM+=$NF; print SUM}' |tail -n 1"%(_settings.METAMOS_JAVA, _settings.rundir, _settings.PREFIX), False)
            if int(float(expectedSize) * _maxAsmSize) >= int(asmSize):
                isContaminant = False

      if isContaminant:
         name = getCommandOutput("cat %s/tax_key.tab |awk -F \"\\t\" '{if ($1 == %s) print $NF}'"%(_settings.DB_DIR, classID), False)
         contaminant = open("%s/Bin/out/contaminant.true"%(_settings.rundir), 'w')
         contaminant.write("%s\t%s\t%s\n"%(majority*100, name, _minContaminantTrustedLen))
         contaminant.close()