Esempio n. 1
0
File: a3m.py Progetto: xiangf/conkit
    def read(self, f_handle, f_id='a3m', remove_insert=True):
        """Read a sequence file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique sequence file identifier
        remove_insert : bool, optional
           Remove insert states [default: True]

        Returns
        -------
        :obj:`SequenceFile <conkit.core.sequencefile.SequenceFile>`

        """

        # Create a new sequence file instance
        sequence_file = SequenceFile(f_id)

        # Read any possible comments and store in file remarks
        while True:
            line = f_handle.readline().rstrip()

            if not line:
                continue
            elif line.startswith('#'):
                sequence_file.remark = line[1:]
            elif line.startswith('>'):
                break

        # Read the sequence record(s) and store them
        while True:
            if not line.startswith('>'):
                raise ValueError("Fasta record needs to start with '>'")

            id = line[1:]  # Header without '>'

            chunks = []
            line = f_handle.readline().rstrip()
            while True:
                if not line:
                    break
                elif line.startswith('>'):
                    break
                chunks.append(line)
                line = f_handle.readline().rstrip()
            seq_string = "".join(chunks)  # Sequence from chunks

            # Remove insert states
            if remove_insert:
                seq_string = self._remove_insert(seq_string)

            # Create the sequence record instance
            sequence_entry = Sequence(id, seq_string)

            # Store the sequence in the file
            try:
                sequence_file.add(sequence_entry)
            except ValueError:
                while True:
                    new_id = sequence_entry.id + "_{0}".format(
                        np.random.randint(0, 100000))
                    if new_id in sequence_file:
                        continue
                    else:
                        break
                sequence_entry.id = new_id
                sequence_file.add(sequence_entry)

            if not line:
                break

        # Match the insert states of the sequence
        if not remove_insert:
            self._adjust_insert(sequence_file)

        return sequence_file
Esempio n. 2
0
def SequenceFile(*args, **kwargs):
    """:obj:`SequenceFile <conkit.core.SequenceFile.SequenceFile>` instance"""
    from conkit.core.sequencefile import SequenceFile
    return SequenceFile(*args, **kwargs)