def test_sequence_dumpmap_2(self): expected_output = ( ">crops|2IXY_2|Chains A,C|Source: MADEUP|No info|" + "#Residues cropped: 3 (2 not from terminal segments) ; % cropped: 23.08 (15.38 not from terminal segments)" + os.linesep + "1 0" + os.linesep + "2 1" + os.linesep + "3 2" + os.linesep + "4 0" + os.linesep + "5 3" + os.linesep + "6 4" + os.linesep + "7 5" + os.linesep + "8 6" + os.linesep + "9 0" + os.linesep + "10 7" + os.linesep + "11 8" + os.linesep + "12 9" + os.linesep + "13 10" + os.linesep) seq = ces.sequence(seqid=2, oligomer='2IXY', chains={'A', 'C'}, seq=_SEQUENCE_5, header=_HEADER_1, extrainfo='No info', source='MADEUP') seq.cropmap = _CROPMAP_1 seq.cropbackmap = _CROPMAP_2 seq.seqs['cropseq'] = _SEQUENCE_6 seq.seqs['fullseq'] = seq.seqs['mainseq'] seq.seqs['mainseq'] = seq.seqs['cropseq'].replace("+", "") returned_output = seq.dumpmap(out='string') self.assertEqual(expected_output, returned_output)
def test_oligoseq_purge_1(self): seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_5_2, header=_HEADER_2) expected_keys = set() expected_seqs = set() oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) oseq.purge() obtained_keys = set(oseq.imer.keys()) obtained_seqs = set(oseq.imer.values()) self.assertEqual(expected_keys, obtained_keys) self.assertEqual(expected_seqs, obtained_seqs)
def test_sequence_length_1(self): expected_length = 246 seq = ces.sequence(seq=_SEQUENCE_2) obtained_length = seq.length() self.assertEqual(expected_length, obtained_length)
def test_sequence_length_2(self): expected_length = 13 seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) obtained_length = seq.length() self.assertEqual(expected_length, obtained_length)
def test_sequence_dump_1(self): expected_output = ( ">crops|1IXY_1|Chains C,D|Source: RCSB PDB|5'-D(*GP*AP*TP*AP*CP*TP*3DRP*AP*GP*AP*TP*AP*G)-3'|" + os.linesep + "GATACTNAGATAG" + os.linesep) seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) returned_output = seq.dump(out='string') self.assertEqual(expected_output, returned_output)
def test_oligoseq_whatseq_1(self): chains = [{'C', 'D'}, {'E', 'F'}] expected_seqnum = ['1', '1', '2', '2'] seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""), header=_HEADER_2) oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) obtained_seqnum = [] for aset in chains: for ch in aset: obtained_seqnum.append(oseq.whatseq(ch)) self.assertListEqual(expected_seqnum, obtained_seqnum)
def test_sequence_ngaps_2(self): expected_ngaps = [3, 3] seq = ces.sequence(seq=_SEQUENCE_2) seq.seqs['gapseq'] = [_SEQUENCE_7, _SEQUENCE_8] obtained_ngaps = seq.ngaps() self.assertEqual(expected_ngaps, obtained_ngaps)
def test_sequence_update_cropsheader_1(self): expected_output = ">crops|1IXY_2|Chains C,D|Source: RCSB PDB|5'-D(*GP*AP*TP*AP*CP*TP*3DRP*AP*GP*AP*TP*AP*G)-3'|" seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq.name = '2' seq.update_cropsheader() returned_output = seq.crops_header self.assertEqual(expected_output, returned_output)
def test_sequence_mainseq_1(self): expected_seq = _SEQUENCE_5 seq = ces.sequence(seqid=1, oligomer='1IXY', chains={'C', 'D'}, seq=_SEQUENCE_5) returned_seq = seq.mainseq() self.assertEqual(expected_seq, returned_seq)
def test_sequence_guess_biotype_1(self): expected_type = ces.guess_type(_SEQUENCE_3) seq = ces.sequence(seqid=1, oligomer='1IXY', chains={'C', 'D'}, seq=_SEQUENCE_3) returned_type = seq.guess_biotype() self.assertEqual(expected_type, returned_type)
def test_sequence_addseq_1(self): expected_seq = _SEQUENCE_3 seq = ces.sequence(seqid=1, oligomer='1IXY', chains={'C', 'D'}, seq=_SEQUENCE_5) seq.addseq(newid="alternative", newseq=_SEQUENCE_3) self.assertTrue("alternative" in seq.seqs) self.assertEqual(seq.seqs["alternative"], expected_seq)
def test_sequence_cropinfo_1(self): expected_info = '#Residues cropped: 3 (2 not from terminal segments) ; % cropped: 23.08 (15.38 not from terminal segments)' seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq.cropmap = _CROPMAP_1 seq.cropbackmap = _CROPMAP_2 seq.seqs['cropseq'] = _SEQUENCE_6 seq.seqs['fullseq'] = seq.seqs['mainseq'] seq.seqs['mainseq'] = seq.seqs['cropseq'].replace("+", "") obtained_info = seq.cropinfo() self.assertEqual(expected_info, obtained_info)
def test_sequence_ncrops_1(self): expected_ncrops = 3 seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq.cropmap = _CROPMAP_1 seq.cropbackmap = _CROPMAP_2 seq.seqs['cropseq'] = _SEQUENCE_6 seq.seqs['fullseq'] = seq.seqs['mainseq'] seq.seqs['mainseq'] = seq.seqs['cropseq'].replace("+", "") obtained_ncrops = seq.ncrops() self.assertEqual(expected_ncrops, obtained_ncrops)
def test_sequence_dump_2(self): expected_output = (">crops|2IXY_2|Chains A,C|Source: MADEUP|No info" + os.linesep + "GATACTNAGATAG" + os.linesep) seq = ces.sequence(seqid=2, oligomer='2IXY', chains={'A', 'C'}, seq=_SEQUENCE_5, header=_HEADER_1, extrainfo='No info', source='MADEUP') returned_output = seq.dump(out='string') self.assertEqual(expected_output, returned_output)
def test_sequence_delseq_2(self): expected_seq = "" seq = ces.sequence(seqid=1, oligomer='1IXY', chains={'C', 'D'}, seq=_SEQUENCE_5) seq.addseq(newid="alternative", newseq=_SEQUENCE_3) seq.delseq(wipeall=True) self.assertFalse("alternative" in seq.seqs) self.assertTrue("mainseq" in seq.seqs) self.assertEqual(seq.seqs["mainseq"], expected_seq)
def test_sequence_dumpmap_1(self): expected_output = ( ">crops|1IXY_1|Chains C,D|Source: RCSB PDB|5'-D(*GP*AP*TP*AP*CP*TP*3DRP*AP*GP*AP*TP*AP*G)-3'|" + "#Residues cropped: 3 (2 not from terminal segments) ; % cropped: 23.08 (15.38 not from terminal segments)" + os.linesep + "1 0" + os.linesep + "2 1" + os.linesep + "3 2" + os.linesep + "4 0" + os.linesep + "5 3" + os.linesep + "6 4" + os.linesep + "7 5" + os.linesep + "8 6" + os.linesep + "9 0" + os.linesep + "10 7" + os.linesep + "11 8" + os.linesep + "12 9" + os.linesep + "13 10" + os.linesep) seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq.cropmap = _CROPMAP_1 seq.cropbackmap = _CROPMAP_2 seq.seqs['cropseq'] = _SEQUENCE_6 seq.seqs['fullseq'] = seq.seqs['mainseq'] seq.seqs['mainseq'] = seq.seqs['cropseq'].replace("+", "") returned_output = seq.dumpmap(out='string') self.assertEqual(expected_output, returned_output)
def parseseq(instream, inset=None): """Parse sequence(s). :param instream: Imported-to-string sequence file content (fasta format). :type instream: str :param inset: Sequence IDs to return, if None it returns them all, defaults to None. :type inset: set or dict or str, optional :raises TypeError: When inset a set [str]; or instream is not a string. :return: Parsed sequences. :rtype: dict [str, :class:`crops.elements.sequences.oligoseq`] """ if isinstance(instream, str) is False: logging.critical('Input argument instream should be a string.') raise TypeError if inset is not None: if (not isinstance(inset, str) and not isinstance(inset, dict) and not isinstance(inset, set)): logging.critical('Input argument inset should be a set or, ' 'alternatively a string or a dictionary.') raise TypeError elif isinstance(inset, str): temp = inset inset = set() inset.add(temp) upperset = set() for element in inset: if not isinstance(element, str): logging.critical('Elements in inseq should be strings.') raise TypeError upperset.add(element.upper()) newseqs = {} newid = [] head = '' chain = '' ignore = False ignore = False indx = -1 inseqlines = instream.splitlines() inseqlines.append('') for raw in range(len(inseqlines)): line = inseqlines[raw].rstrip() if (not line or line.startswith(">")) and not ignore: if indx >= 0: if newid['mainid'] not in newseqs: newseqs[newid['mainid']] = oligoseq( oligomer_id=newid['mainid']) aseq = sequence(seqid=newid['seqid'], oligomer=newid['mainid'], seq=chain, chains=newid['chains'], source=newid['source'], header=head, extrainfo=newid['comments']) newseqs[newid['mainid']].add_sequence(aseq) else: pass if line.startswith(">"): newid = retrieve_id(line) head = line indx += 1 chain = '' if inset is not None: ignore = False if newid['mainid'] in upperset else True elif line.startswith("#") or line.startswith(' #'): continue else: if not ignore: chain += str(line) return newseqs