예제 #1
0
파일: cf.py 프로젝트: morganmuell/cflib
def cf_to_fasta(cfS, outname, consensus=False):
    """Convert a :class:`CFStream` to a fasta file.

    Extracts the sequences of a counts file that has been initialized
    with an :class:`CFStream`.  The conversion starts at the line
    pointed to by the :class:`CFStream`.

    If more than one base is present at a single site, one base is
    sampled out of all present ones according to its abundance.

    If consensus is set to True, the consensus sequence is extracted
    (e.g., no sampling but the bases with highest counts for each
    individual or population are chosen).

    :param CFStream cfS: Counts format file stream.
    :param str outname: Fasta output file name.
    :param Boolean consensus: Optional; Extract consensus sequence?
      Defaults to False.

    """
    logging.info("Convert counts file to fasta.")
    logging.info("Counts file stream to be converted: %s", cfS.name)
    logging.info("Fasta output file: %s", outname)
    logging.info("Consensus is set to %s.", consensus)
    faS = fasta.FaSeq()

    faS.name = cfS.name

    for ind in cfS.indivL:
        seq = sb.Seq()
        seq.name = ind
        faS.seqL.append(seq)

    # print(cfS.chrom, cfS.pos)
    faseq_append_base_of_cfS(faS, cfS)

    while True:
        try:
            cfS.read_next_pos()
        except ValueError:
            break
        else:
            # print(cfS.chrom, cfS.pos)
            faseq_append_base_of_cfS(faS, cfS, consensus)

    of = open(outname, mode='w')
    for i in range(cfS.nIndiv):
        faS.seqL[i].print_fa_entry(fo=of)
        print('', file=of)
    of.close()
예제 #2
0
파일: fasta.py 프로젝트: morganmuell/cflib
 def get_seq_by_id(self, i):
     """Return sequence number `i` as `Seq` object."""
     seq = sb.Seq()
     seq = self.seqL[i]
     return seq
예제 #3
0
파일: fasta.py 프로젝트: morganmuell/cflib
def read_seq_from_fo(line, fo, getAlignEndFlag=False):
    """Read a single fasta sequence.

    Read a single fasta sequence from file object *fo* and save it to
    a new :class:`Seq <cflib.seqbase.Seq>` sequence object. Return
    the header line of the next fasta sequence and the newly created
    sequence. If no new sequence is found, the next header line will
    be set to None.

    :param str line: Header line of the sequence.
    :param fo fo: File object of the fasta file.

    :param Boolean getAlignFlag: If set to true, an additional Boolean
      value that specifies if a multiple sequence alignment ends, is
      returned.

    :rtype: (str, Seq) | (str, Seq, Boolean)

    """
    def get_sp_name_and_description(fa_header_line):
        """Extract species name and description.

        Extract species name and description from a fasta file header
        line `fa_header_line`.

        """
        lineList = fa_header_line.rstrip().split(maxsplit=1)
        name = lineList[0][1:]
        description = ""
        if len(lineList) > 1:
            description = lineList[1]
        return (name, description)

    def fill_seq_from_fo(line, fo, seq):
        """Read a single fasta sequence.

        Read a single fasta sequence from file object `fo` and save it
        to `seq`. Returns the next header line and a flag that is set
        to true if the end of an alignment is reached (a line only
        contains a newline character).  If no new sequence is found,
        the next header line will be set to None.

        :param str line: Header line of the sequence.
        :param fo for: File object of the fasta file.
        :param Seq seq: The sequence that will be filled.

        """
        (name, descr) = get_sp_name_and_description(line)
        seq.name = name
        seq.descr = descr
        data = ""
        alignEndFl = False
        for line in fo:
            if line == '\n':
                # Newline found, end of alignment.
                alignEndFl = True
            elif line[0] == '>':
                # New species found in line.
                break
            else:
                data += line.rstrip()
        seq.data = data
        seq.dataLen = len(data)
        if line[0] != '>':
            # We reached the end of file.
            line = None
        return (line, alignEndFl)

    seq = sb.Seq()
    (newHeaderLine, alignEndFl) = fill_seq_from_fo(line, fo, seq)
    if getAlignEndFlag is False:
        return (newHeaderLine, seq)
    else:
        return (newHeaderLine, seq, alignEndFl)