Example #1
0
def trf_run_CMD(fas_file, seq, seqtype,softmask=False,tmpdir=None):
    if seqtype == 'p':
        return fas_file

    olddir = os.getcwd()
    os.chdir(os.path.dirname(fas_file.name))
    f = open(os.devnull, 'w')
    util.run_cmd("trf %s 2 7 7 80 10 50 2000 -m -h" % fas_file.name, verbose=False,stdout=f)

    fileres = fas_file.name + '.2.7.7.80.10.50.2000.mask'
    fileres2 = fas_file.name + '.2.7.7.80.10.50.2000.dat'
    result = Read(fileres,sep='\t')
    util.run_cmd('rm %s' % fileres, shell=True)
    util.run_cmd('rm %s' % fileres2, shell=True)
    fas_file.close()
     
    fas_new = tempfile.NamedTemporaryFile(suffix='.fasta', dir=tmpdir)
    if softmask:
        nresult = []
        for seqold,seqnew in zip(seq, result.seq()):
            seqnew = ''.join([c1 if c2 != 'N' or c1 == 'N' else c1.lower() for c1,c2 in zip(seqold, seqnew)])
            nresult.append(seqnew)
        write_fasta_text(result.Get(0)(), nresult, len(nresult), fas_new)
    else:
        write_fasta_text(result.Get(0)(), result.seq(), len(result), fas_new)
    fas_new.flush()

    os.chdir(olddir)
    return fas_new
Example #2
0
def last(data, type, folder=None, pos_mode = 'last', dbargs='', alargs='', lsargs='', probs=0, trf=False, last_split=True, calc_evalue=False,softmask=False,tmpdir=None):
  alargs = [alargs]
  dbargs = [dbargs]
  lsargs = [lsargs]

  if probs:
    alargs.append('-j %d' % (probs+4))

  seq_1 = data[0];
  seq_2 = data[1];

  if type[0] != type[1]:
    if type[0] == 'n' and type[1] == 'p':
        util.warning('Reversing order, last supports only Prot to DNA and not DNA to Prot')
        seq_1,seq_2 = seq_2,seq_1
        type = (type[1],type[0])
  
  if trf:
    seq_1 = [s.upper() for s in seq_1]
    seq_2 = [s.upper() for s in seq_2]
    if not_contains(dbargs, '-c'):
        dbargs.append('-c')
    if not_contains(alargs, '-u'):
        alargs.append('-u2')

  title_1 = [ "%d" % i for i in xrange(len(seq_1)) ];
  title_2 = [ "%d" % i for i in xrange(len(seq_2)) ];

  fas_1 = tempfile.NamedTemporaryFile(suffix='.fasta',dir=tmpdir)
  fas_2 = tempfile.NamedTemporaryFile(suffix='.fasta',dir=tmpdir)
  res = tempfile.NamedTemporaryFile(suffix='.maf',dir=tmpdir)
  
  db_1  = fas_1.name[:-4]
  db_2  = fas_2.name[:-6]

  md5_1 = write_fasta_text(title_1, seq_1, len(seq_1), fas_1);
  md5_2 = write_fasta_text(title_2, seq_2, len(seq_2), fas_2);
  
  fas_1.flush()
  fas_2.flush()
  
  if trf:
    fas_1 = trf_run_CMD(fas_1, seq_1, type[0],softmask=softmask, tmpdir=tmpdir)
    fas_2 = trf_run_CMD(fas_2, seq_2, type[1],softmask=softmask, tmpdir=tmpdir)

  if type[0] != type[1]:
    calc_evalue = False

  util.run_cmd(last_make_db_CMD(db_1, fas_1.name, type[0], dbargs), verbose=False)
  if calc_evalue:
     util.run_cmd(last_make_db_CMD(db_2, fas_2.name, type[1], dbargs), verbose=False)

  util.run_cmd(last_run_CMD(db_1, type[0], db_2, fas_2.name, type[1], alargs, lsargs, last_split, calc_evalue), shell=True, stdout=res, verbose=False)
  res.flush()

  data = last_result2(res.name, pos_mode, probs>0, last_split, calc_evalue); 

  fas_1.close();
  fas_2.close();
  res.close()
  util.run_cmd('rm %s*' % db_1, shell=True)
  if calc_evalue:
    util.run_cmd('rm %s*' % db_2, shell=True)

  return data
Example #3
0
def blast(data, type, folder, reciprocal = True, normalize = False, overwrite = False, blastopts='-num_threads %d' % multiprocessing.cpu_count()):

  seq_1 = data[0];
  seq_2 = data[1];

  title_1 = [ "%d" % i for i in xrange(len(seq_1)) ];
  title_2 = [ "%d" % i for i in xrange(len(seq_2)) ];

  fas_1 = tempfile.NamedTemporaryFile(delete = False);
  fas_2 = tempfile.NamedTemporaryFile(delete = False);
  db_1  = "%s.blastdb" % (fas_1.name);
  db_2  = "%s.blastdb" % (fas_2.name)

  md5_1 = write_fasta_text(title_1, seq_1, len(seq_1), fas_1);
  md5_2 = write_fasta_text(title_2, seq_2, len(seq_2), fas_2);
  fas_1.close();
  fas_2.close();

  mkdb_CMDs = [];
  blst_CMDs = [];

    # perform blast for 12
  file_12 = "%s/%s-%s.tsv" % (folder, md5_1, md5_2);
  mkdb_CMDs = mkdb_CMDs + [ blast_make_db_CMD(fas_2.name, db_2, type[1]) ];
  blst_CMDs = blst_CMDs + [ blast_run_CMD(fas_1.name, db_2, type[0], file_12, blastopts, overwrite) ];

  if reciprocal:
      # perform blast for 21
    file_21 = "%s/%s-%s.tsv" % (folder, md5_2, md5_1);
    mkdb_CMDs = mkdb_CMDs + [ blast_make_db_CMD(fas_1.name, db_1, type[1]) ];
    blst_CMDs = blst_CMDs + [ blast_run_CMD(fas_2.name, db_1, type[0], file_21, blastopts, overwrite) ];
  #fi

  if normalize:
      # perform blast for 11
    file_11 = "%s/%s-%s.tsv" % (folder, md5_1, md5_1);
    blst_CMDs = blst_CMDs + [ blast_run_CMD(fas_1.name, db_1, type[0], file_11, blastopts, overwrite) ];

      # perform blast for 22
    file_22 = "%s/%s-%s.tsv" % (folder, md5_2, md5_2);
    blst_CMDs = blst_CMDs + [ blast_run_CMD(fas_2.name, db_2, type[1], file_22, blastopts, overwrite) ];
  #fi

  util.run_par_cmds(mkdb_CMDs);
  util.run_seq_cmds(blst_CMDs);

  del_CMDs = [ "rm -f '/tmp/%s'" % f for f in os.listdir('/tmp') if (fnmatch.fnmatch(f, "*%s*" % fas_1.name.split('/')[2])) or  (fnmatch.fnmatch(f, "*%s*" % fas_2.name.split('/')[2])) ];
  util.run_seq_cmds(del_CMDs);

  ab = blast_res_to_dict(file_12); # if reciprocal blast_reciprocal(file_12.name, file_21.name) else blast_res_to_dict(file_12.name)

  if reciprocal:
    ba = blast_res_to_dict(file_21);
    ab = blast_reciprocal(ab, ba);
  #fi

  if normalize:
    aa = blast_res_to_dict(file_11, max=True);
    bb = blast_res_to_dict(file_22, max=True);
    ab = blast_bitscore_normalize(ab, aa, bb);
  #fi

    #         qseqid  sseqid qlen qstart qend slen sstart send length mismatch gapopen pident evalue bitscore
  sp_types = (int,    int,    int,  int, int, int,  int,    int,  int,   int,      int,     float,  float,  float)
  ab = [ [ list(p[0]) + h for h in p[1] ] for p in ab.items() ];
  ab = [ item for sublist in ab for item in sublist];

  if len(ab) == 0:
    return tuple([ util.darray([],type) for type in sp_types ] )
  #fi

  return tuple([ util.darray(row,type) for (type,row) in zip( sp_types, map(lambda *row: list(row), *ab)) ] );