Ejemplo n.º 1
0
def make_backbone_blast_db(project_dir, blast_db_seq, dbtype):
    'It formats a blastdb when need it'
    logger = logging.getLogger(LOGGER_NAME)
    #the name should be the basename of the blast_db_seq
    db_dir = join(project_dir, BACKBONE_DIRECTORIES['blast_databases'])
    if not exists(db_dir):
        makedirs(db_dir)
    db_seq_fpath = join(db_dir, _get_basename(blast_db_seq))
    if not exists(db_seq_fpath):
        #which is the name of the new databae?
        blast_db_seq_format = guess_seq_file_format(open(blast_db_seq))
        if blast_db_seq_format == 'fasta':
            rel_symlink(blast_db_seq, db_seq_fpath)
        else:
            seqio(in_seq_fhand=open(blast_db_seq),
                  out_seq_fhand=open(db_seq_fpath, 'w'),
                  out_format='fasta')
        logger.info('Formatting the database %s' % db_seq_fpath)
        try:
            makeblastdb_plus(db_seq_fpath, dbtype=dbtype)
        except RuntimeError:
            msg = 'Error making blastdb. db:%s\n dbtype:%s\n' % \
                                               (db_seq_fpath, dbtype)
            remove(db_seq_fpath)
            raise RuntimeError(msg)
    return db_seq_fpath
Ejemplo n.º 2
0
def _create_temp_fasta_file(fpath):
    'It creates a fasta file format temfile'
    fasta_fhand = NamedTemporaryFile(suffix='.fasta', mode='a')
    seqio(in_seq_fhand=open(fpath),
          out_seq_fhand=fasta_fhand,
          out_format='fasta')
    return fasta_fhand
Ejemplo n.º 3
0
def main():
    'The main function'
    (in_seq_fhand, in_qual_fhand, in_format, out_seq_fhand, out_qual_fhand,
     out_format, double_encoding) = set_parameters()

    seqio(in_seq_fhand=in_seq_fhand, in_qual_fhand=in_qual_fhand,
          in_format=in_format,
          out_seq_fhand=out_seq_fhand, out_qual_fhand=out_qual_fhand,
          out_format=out_format, double_encoding=double_encoding)
Ejemplo n.º 4
0
def main():
    "The main part"

    mira_path, iassembler_path = set_parameters()

    # guess the mira files that we need
    unigenes_fpath, unigenes_qual_fpath, mira_contig_read_fpath = get_mira_paths(mira_path)

    # create the iassembler project dir and subdirs
    if not os.path.exists(iassembler_path):
        os.makedirs(iassembler_path)
    mira_1_dir = os.path.join(iassembler_path, "{0:s}_Assembly".format(IASSEMBLER_INPUT_NAME), "mira")
    os.makedirs(mira_1_dir)

    # prepare contig readlist for iaasembler
    iassembler_contig_mem_fpath = os.path.join(mira_1_dir, "CMF10")
    process_contig_readlist(mira_contig_read_fpath, iassembler_contig_mem_fpath)

    # copy unigene files into the iassembler project
    iassembler_unigenes = os.path.join(mira_1_dir, "mira2.fa")
    iassembler_unigenes_qual = os.path.join(mira_1_dir, "mira2.fa.qual")
    iassembler_unigenes_fh = open(iassembler_unigenes, "w")
    iassembler_unigenes_qual_fh = open(iassembler_unigenes_qual, "w")
    seq_writer = SequenceWriter(
        fhand=iassembler_unigenes_fh, file_format="fasta", qual_fhand=iassembler_unigenes_qual_fh
    )

    for seq in seqs_in_file(seq_fhand=open(unigenes_fpath), format="sfastq", qual_fhand=open(unigenes_qual_fpath)):
        seq.name = "mira_{0:s}".format(seq.name.split("_", 1)[1])
        seq.id = seq.name
        seq_writer.write(seq)

    # create iassembler input files.
    seq_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME), "w")
    qual_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME + ".qual"), "w")

    for file_ in os.listdir(os.path.join(mira_path, "..")):
        if "_in" in file_:
            file_ = os.path.join(mira_path, "..", file_)
            seqio(
                in_seq_fhand=open(file_),
                out_seq_fhand=seq_fhand,
                out_qual_fhand=qual_fhand,
                in_format="sfastq",
                out_format="fasta",
            )

    msg = "To run iassembler you must use this command:\n"
    msg += "iassembler -c -i {0:s}\n".format(IASSEMBLER_INPUT_NAME)
    msg += "From your iassembler dir:{0:s}".format(iassembler_path)
    print msg
    def test_fastq_to_fastq_solexa():
        'It tests the conversion using the Biopython convert function'
        fcontent  = '@seq1\n'
        fcontent += 'CCCT\n'
        fcontent += '+\n'
        fcontent += ';;3;\n'
        fcontent += '@SRR001666.1\n'
        fcontent += 'GTTGC\n'
        fcontent += '+\n'
        fcontent += ';;;;;\n'
        fhand = StringIO.StringIO(fcontent)

        out_seq_fhand = StringIO.StringIO()
        seqio(in_seq_fhand=fhand, in_format='fastq',
              out_seq_fhand=out_seq_fhand, out_format='fastq-solexa')
        result = '@seq1\nCCCT\n+\nZZRZ\[email protected]\nGTTGC\n+\nZZZZZ\n'
        assert out_seq_fhand.getvalue() == result
Ejemplo n.º 6
0
 def _files_to_temp_fasta(files):
     'It converts the given files to a temporary fasta and qual'
     fastas, quals = [], []
     for file_ in files:
         #are we dealing with a fastq file (with qual)
         if 'fastq' in os.path.splitext(file_.name)[-1]:
             qual  = NamedTemporaryFile(suffix='.qual')
             fasta = NamedTemporaryFile(suffix='.fasta')
             seqio(in_seq_fhand=file_, out_seq_fhand=fasta,
                     out_qual_fhand=qual, out_format='fasta')
         else:
             #the file is already fasta
             fasta = file_
             qual = None
         fastas.append(fasta)
         quals.append(qual)
     return fastas, quals
    def test_fastq_to_fasta_qual():
        'It tests the conversion from fastq to fasta'
        fcontent  = '@seq1\n'
        fcontent += 'CCCT\n'
        fcontent += '+\n'
        fcontent += ';;3;\n'
        fcontent += '@SRR001666.1\n'
        fcontent += 'GTTGC\n'
        fcontent += '+\n'
        fcontent += ';;;;;\n'
        fhand = StringIO.StringIO(fcontent)

        out_seq_fhand = tempfile.NamedTemporaryFile(suffix='.fasta')
        out_qual_fhand = tempfile.NamedTemporaryFile(suffix='.qual')
        seqio(in_seq_fhand=fhand, in_format='fastq',
              out_seq_fhand=out_seq_fhand, out_qual_fhand=out_qual_fhand,
              out_format='fasta')
        result = '>seq1\nCCCT\n>SRR001666.1\nGTTGC\n'
        assert open(out_seq_fhand.name).read() == result

        qual = '>seq1\n26 26 18 26\n>SRR001666.1\n26 26 26 26 26\n'
        assert open(out_qual_fhand.name).read() == qual