Example #1
0
 def test_remark_3(self):
     sequence_file = SequenceFile("test")
     sequence_file.remark = "Hello"
     sequence_file.remark = "5"
     sequence_file.remark = "World"
     sequence_file.remark = "!"
     self.assertEqual(["Hello", "5", "World", "!"], sequence_file.remark)
Example #2
0
 def test_remark_3(self):
     sequence_file = SequenceFile('test')
     sequence_file.remark = 'Hello'
     sequence_file.remark = '5'
     sequence_file.remark = 'World'
     sequence_file.remark = '!'
     self.assertEqual(['Hello', '5', 'World', '!'], sequence_file.remark)
Example #3
0
    def read(self, f_handle, f_id='a3m', remove_inserts=True):
        """Read a sequence file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique sequence file identifier
        remove_inserts : bool, optional
           Remove insert states [default: True]

        Returns
        -------
        :obj:`~conkit.core.sequencefile.SequenceFile`

        """
        sequence_file = SequenceFile(f_id)
        while True:
            line = f_handle.readline().rstrip()
            if not line:
                continue
            elif line.startswith('#'):
                sequence_file.remark = line[1:]
            elif line.startswith('>'):
                break
        while True:
            if not line.startswith('>'):
                raise ValueError("Fasta record needs to start with '>'")
            id = line[1:]
            chunks = []
            line = f_handle.readline().rstrip()
            while True:
                if not line:
                    break
                elif line.startswith('>'):
                    break
                chunks.append(line)
                line = f_handle.readline().rstrip()
            seq_string = "".join(chunks)
            if remove_inserts:
                seq_string = self._remove_inserts(seq_string)
            sequence_entry = Sequence(id, seq_string)
            try:
                sequence_file.add(sequence_entry)
            except ValueError:
                while True:
                    new_id = sequence_entry.id + "_{0}".format(
                        np.random.randint(0, 100000))
                    if new_id in sequence_file:
                        continue
                    else:
                        break
                sequence_entry.id = new_id
                sequence_file.add(sequence_entry)
            if not line:
                break
        if not remove_inserts:
            self._adjust_insert(sequence_file)
        return sequence_file
Example #4
0
 def test_remark_5(self):
     sequence_file = SequenceFile('test')
     sequence_file.remark = 'hello'
     sequence = Sequence('foo', 'GSMFTPK')
     sequence.remark = 'bar'
     sequence_file.add(sequence)
     self.assertEqual(['hello'], sequence_file.remark)
     self.assertEqual(['bar'], sequence_file[0].remark)
Example #5
0
 def test_remark_5(self):
     sequence_file = SequenceFile("test")
     sequence_file.remark = "hello"
     sequence = Sequence("foo", "GSMFTPK")
     sequence.remark = "bar"
     sequence_file.add(sequence)
     self.assertEqual(["hello"], sequence_file.remark)
     self.assertEqual(["bar"], sequence_file[0].remark)
Example #6
0
    def read(self, f_handle, f_id='fasta'):
        """Read a sequence file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique sequence file identifier

        Returns
        -------
        :obj:`SequenceFile <conkit.core.sequencefile.SequenceFile>`

        """

        # Create a new sequence file instance
        hierarchy = SequenceFile(f_id)

        # Read any possible comments and store in file remarks
        while True:
            line = f_handle.readline().rstrip()

            if not line:
                continue
            elif line.startswith('#'):
                hierarchy.remark = line[1:]
            elif line.startswith('>'):
                break

        # Read the sequence record(s) and store them
        while True:
            if not line.startswith('>'):
                raise ValueError("Fasta record needs to start with '>'")

            id = line[1:]  # Header without '>'

            chunks = []
            line = f_handle.readline().rstrip()
            while True:
                if not line:
                    break
                elif line.startswith('>'):
                    break
                chunks.append(line)
                line = f_handle.readline().rstrip()
            _seq_string = "".join(chunks)  # Sequence from chunks

            # Create the sequence record instance
            sequence_entry = Sequence(id, _seq_string)

            # Store the sequence in the file
            hierarchy.add(sequence_entry)

            if not line:
                break

        return hierarchy
Example #7
0
    def read(self, f_handle, f_id="fasta"):
        """Read a sequence file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique sequence file identifier

        Returns
        -------
        :obj:`~conkit.core.sequencefile.SequenceFile`

        Raises
        ------
        :exc:`ValueError`
           FASTA record needs to start with >

        """
        hierarchy = SequenceFile(f_id)

        while True:
            line = f_handle.readline().rstrip()

            if not line:
                continue
            elif line.startswith("#"):
                hierarchy.remark = line[1:]
            elif line.startswith(">"):
                break

        while True:
            if not line.startswith(">"):
                raise ValueError("Fasta record needs to start with '>'")

            id = line[1:]  # Header without '>'

            chunks = []
            line = f_handle.readline().rstrip()
            while True:
                if not line:
                    break
                elif line.startswith(">"):
                    break
                chunks.append(line)
                line = f_handle.readline().rstrip()
            _seq_string = "".join(chunks)  # Sequence from chunks

            sequence_entry = Sequence(id, _seq_string)

            hierarchy.add(sequence_entry)

            if not line:
                break

        return hierarchy
Example #8
0
 def test_remark_1(self):
     sequence_file = SequenceFile('test')
     sequence_file.remark = 'Hello'
     self.assertEqual(['Hello'], sequence_file.remark)
Example #9
0
 def test_remark_1(self):
     sequence_file = SequenceFile("test")
     sequence_file.remark = "Hello"
     self.assertEqual(["Hello"], sequence_file.remark)
Example #10
0
File: a3m.py Project: xiangf/conkit
    def read(self, f_handle, f_id='a3m', remove_insert=True):
        """Read a sequence file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique sequence file identifier
        remove_insert : bool, optional
           Remove insert states [default: True]

        Returns
        -------
        :obj:`SequenceFile <conkit.core.sequencefile.SequenceFile>`

        """

        # Create a new sequence file instance
        sequence_file = SequenceFile(f_id)

        # Read any possible comments and store in file remarks
        while True:
            line = f_handle.readline().rstrip()

            if not line:
                continue
            elif line.startswith('#'):
                sequence_file.remark = line[1:]
            elif line.startswith('>'):
                break

        # Read the sequence record(s) and store them
        while True:
            if not line.startswith('>'):
                raise ValueError("Fasta record needs to start with '>'")

            id = line[1:]  # Header without '>'

            chunks = []
            line = f_handle.readline().rstrip()
            while True:
                if not line:
                    break
                elif line.startswith('>'):
                    break
                chunks.append(line)
                line = f_handle.readline().rstrip()
            seq_string = "".join(chunks)  # Sequence from chunks

            # Remove insert states
            if remove_insert:
                seq_string = self._remove_insert(seq_string)

            # Create the sequence record instance
            sequence_entry = Sequence(id, seq_string)

            # Store the sequence in the file
            try:
                sequence_file.add(sequence_entry)
            except ValueError:
                while True:
                    new_id = sequence_entry.id + "_{0}".format(
                        np.random.randint(0, 100000))
                    if new_id in sequence_file:
                        continue
                    else:
                        break
                sequence_entry.id = new_id
                sequence_file.add(sequence_entry)

            if not line:
                break

        # Match the insert states of the sequence
        if not remove_insert:
            self._adjust_insert(sequence_file)

        return sequence_file