Exemple #1
0
    def test_oligoseq_set_cropmaps_1(self):
        expected_seqs = []
        expected_seqs.append(_SEQUENCE_5)
        expected_seqs.append(_SEQUENCE_6)
        expected_seqs.append(_SEQUENCE_6.replace("+", ""))
        expected_seqs.append(_SEQUENCE_5_2)
        expected_seqs.append(_SEQUENCE_6_2)
        expected_seqs.append(_SEQUENCE_6_2.replace("+", ""))

        seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1)
        seq_2 = ces.sequence(seq=_SEQUENCE_5_2, header=_HEADER_2)

        oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq})
        oseq.add_sequence(seq_2)

        cropmaps = {}
        for key in oseq.imer:
            cropmaps[key] = {'cropmap': _CROPMAP_1, 'cropbackmap': _CROPMAP_2}

        oseq.set_cropmaps(cropmaps, cropmain=True)

        obtained_seqs = []
        obtained_seqs.append(oseq.imer['1'].seqs['fullseq'])
        obtained_seqs.append(oseq.imer['1'].seqs['cropseq'])
        obtained_seqs.append(oseq.imer['1'].seqs['mainseq'])
        obtained_seqs.append(oseq.imer['2'].seqs['fullseq'])
        obtained_seqs.append(oseq.imer['2'].seqs['cropseq'])
        obtained_seqs.append(oseq.imer['2'].seqs['mainseq'])

        self.assertDictEqual(oseq.imer['1'].cropmap, _CROPMAP_1)
        self.assertDictEqual(oseq.imer['1'].cropbackmap, _CROPMAP_2)
        self.assertDictEqual(oseq.imer['2'].cropmap, _CROPMAP_1)
        self.assertDictEqual(oseq.imer['2'].cropbackmap, _CROPMAP_2)
        for n in range(len(expected_seqs)):
            self.assertEqual(expected_seqs[n], obtained_seqs[n])
Exemple #2
0
    def test_oligoseq_chainlist_1(self):
        expected_chains = {'C', 'D', 'E', 'F'}

        seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1)
        seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""),
                             header=_HEADER_2)

        oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq})
        oseq.add_sequence(seq_2)

        obtained_chains = oseq.chainlist()

        self.assertEqual(expected_chains, obtained_chains)
Exemple #3
0
    def test_oligoseq_nseqs_1(self):
        expected_value = 2

        seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1)
        seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""),
                             header=_HEADER_2)

        oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq})
        oseq.add_sequence(seq_2)

        obtained_value = oseq.nseqs()

        self.assertEqual(expected_value, obtained_value)
Exemple #4
0
    def test_oligoseq_add_sequence_1(self):
        seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1)
        seq_2 = ces.sequence(seq=_SEQUENCE_5_2, header=_HEADER_2)

        expected_keys = {'1', '2'}
        expected_seqs = {seq, seq_2}

        oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq})
        oseq.add_sequence(seq_2)

        obtained_keys = set(oseq.imer.keys())
        obtained_seqs = set(oseq.imer.values())

        self.assertEqual(expected_keys, obtained_keys)
        self.assertEqual(expected_seqs, obtained_seqs)
Exemple #5
0
    def test_oligoseq_write_1(self):
        expected_output = (
            ">crops|1IXY_1|Chains C,D|Source: RCSB PDB|5'-D(*GP*AP*TP*AP*CP*TP*3DRP*AP*GP*AP*TP*AP*G)-3'|"
            + os.linesep + "GATACTNAGATAG" + os.linesep +
            ">crops|1IXY_2|Chains E,F|Source: RCSB PDB|5'-D(*CP*TP*AP*TP*CP*TP*GP*AP*GP*TP*AP*TP*C)-3'|"
            + os.linesep + "CTATCTGAGTATC" + os.linesep)

        seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1)
        seq_2 = ces.sequence(seq=_SEQUENCE_5_2, header=_HEADER_2)

        oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq})
        oseq.add_sequence(seq_2)

        returned_output = oseq.write(outdir='string')

        self.assertEqual(expected_output, returned_output)
Exemple #6
0
    def test_oligoseq_length_1(self):
        expected_length_1 = 13
        expected_length_2 = 10

        seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1)
        seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""),
                             header=_HEADER_2)

        oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq})
        oseq.add_sequence(seq_2)

        obtained_length_1 = oseq.length(1)
        obtained_length_2 = oseq.length(2)

        self.assertEqual(expected_length_1, obtained_length_1)
        self.assertEqual(expected_length_2, obtained_length_2)
Exemple #7
0
    def test_oligoseq_whatseq_1(self):
        chains = [{'C', 'D'}, {'E', 'F'}]

        expected_seqnum = ['1', '1', '2', '2']

        seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1)
        seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""),
                             header=_HEADER_2)

        oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq})
        oseq.add_sequence(seq_2)

        obtained_seqnum = []
        for aset in chains:
            for ch in aset:
                obtained_seqnum.append(oseq.whatseq(ch))

        self.assertListEqual(expected_seqnum, obtained_seqnum)
Exemple #8
0
def parseseq(instream, inset=None):
    """Parse sequence(s).

    :param instream: Imported-to-string sequence file content (fasta format).
    :type instream: str
    :param inset: Sequence IDs to return, if None it returns them all, defaults to None.
    :type inset: set or dict or str, optional

    :raises TypeError: When inset a set [str]; or instream is not a string.

    :return: Parsed sequences.
    :rtype: dict [str, :class:`crops.elements.sequences.oligoseq`]

    """
    if isinstance(instream, str) is False:
        logging.critical('Input argument instream should be a string.')
        raise TypeError

    if inset is not None:
        if (not isinstance(inset, str) and not isinstance(inset, dict)
                and not isinstance(inset, set)):
            logging.critical('Input argument inset should be a set or, '
                             'alternatively a string or a dictionary.')
            raise TypeError
        elif isinstance(inset, str):
            temp = inset
            inset = set()
            inset.add(temp)
        upperset = set()
        for element in inset:
            if not isinstance(element, str):
                logging.critical('Elements in inseq should be strings.')
                raise TypeError
            upperset.add(element.upper())

    newseqs = {}
    newid = []
    head = ''
    chain = ''
    ignore = False
    ignore = False
    indx = -1
    inseqlines = instream.splitlines()
    inseqlines.append('')
    for raw in range(len(inseqlines)):
        line = inseqlines[raw].rstrip()
        if (not line or line.startswith(">")) and not ignore:
            if indx >= 0:
                if newid['mainid'] not in newseqs:
                    newseqs[newid['mainid']] = oligoseq(
                        oligomer_id=newid['mainid'])
                aseq = sequence(seqid=newid['seqid'],
                                oligomer=newid['mainid'],
                                seq=chain,
                                chains=newid['chains'],
                                source=newid['source'],
                                header=head,
                                extrainfo=newid['comments'])
                newseqs[newid['mainid']].add_sequence(aseq)
        else:
            pass

        if line.startswith(">"):
            newid = retrieve_id(line)
            head = line
            indx += 1
            chain = ''
            if inset is not None:
                ignore = False if newid['mainid'] in upperset else True
        elif line.startswith("#") or line.startswith(' #'):
            continue
        else:
            if not ignore:
                chain += str(line)

    return newseqs