def test_oligoseq_set_cropmaps_1(self): expected_seqs = [] expected_seqs.append(_SEQUENCE_5) expected_seqs.append(_SEQUENCE_6) expected_seqs.append(_SEQUENCE_6.replace("+", "")) expected_seqs.append(_SEQUENCE_5_2) expected_seqs.append(_SEQUENCE_6_2) expected_seqs.append(_SEQUENCE_6_2.replace("+", "")) seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_5_2, header=_HEADER_2) oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) cropmaps = {} for key in oseq.imer: cropmaps[key] = {'cropmap': _CROPMAP_1, 'cropbackmap': _CROPMAP_2} oseq.set_cropmaps(cropmaps, cropmain=True) obtained_seqs = [] obtained_seqs.append(oseq.imer['1'].seqs['fullseq']) obtained_seqs.append(oseq.imer['1'].seqs['cropseq']) obtained_seqs.append(oseq.imer['1'].seqs['mainseq']) obtained_seqs.append(oseq.imer['2'].seqs['fullseq']) obtained_seqs.append(oseq.imer['2'].seqs['cropseq']) obtained_seqs.append(oseq.imer['2'].seqs['mainseq']) self.assertDictEqual(oseq.imer['1'].cropmap, _CROPMAP_1) self.assertDictEqual(oseq.imer['1'].cropbackmap, _CROPMAP_2) self.assertDictEqual(oseq.imer['2'].cropmap, _CROPMAP_1) self.assertDictEqual(oseq.imer['2'].cropbackmap, _CROPMAP_2) for n in range(len(expected_seqs)): self.assertEqual(expected_seqs[n], obtained_seqs[n])
def test_oligoseq_chainlist_1(self): expected_chains = {'C', 'D', 'E', 'F'} seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""), header=_HEADER_2) oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) obtained_chains = oseq.chainlist() self.assertEqual(expected_chains, obtained_chains)
def test_oligoseq_nseqs_1(self): expected_value = 2 seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""), header=_HEADER_2) oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) obtained_value = oseq.nseqs() self.assertEqual(expected_value, obtained_value)
def test_oligoseq_add_sequence_1(self): seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_5_2, header=_HEADER_2) expected_keys = {'1', '2'} expected_seqs = {seq, seq_2} oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) obtained_keys = set(oseq.imer.keys()) obtained_seqs = set(oseq.imer.values()) self.assertEqual(expected_keys, obtained_keys) self.assertEqual(expected_seqs, obtained_seqs)
def test_oligoseq_write_1(self): expected_output = ( ">crops|1IXY_1|Chains C,D|Source: RCSB PDB|5'-D(*GP*AP*TP*AP*CP*TP*3DRP*AP*GP*AP*TP*AP*G)-3'|" + os.linesep + "GATACTNAGATAG" + os.linesep + ">crops|1IXY_2|Chains E,F|Source: RCSB PDB|5'-D(*CP*TP*AP*TP*CP*TP*GP*AP*GP*TP*AP*TP*C)-3'|" + os.linesep + "CTATCTGAGTATC" + os.linesep) seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_5_2, header=_HEADER_2) oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) returned_output = oseq.write(outdir='string') self.assertEqual(expected_output, returned_output)
def test_oligoseq_length_1(self): expected_length_1 = 13 expected_length_2 = 10 seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""), header=_HEADER_2) oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) obtained_length_1 = oseq.length(1) obtained_length_2 = oseq.length(2) self.assertEqual(expected_length_1, obtained_length_1) self.assertEqual(expected_length_2, obtained_length_2)
def test_oligoseq_whatseq_1(self): chains = [{'C', 'D'}, {'E', 'F'}] expected_seqnum = ['1', '1', '2', '2'] seq = ces.sequence(seq=_SEQUENCE_5, header=_HEADER_1) seq_2 = ces.sequence(seq=_SEQUENCE_6_2.replace("+", ""), header=_HEADER_2) oseq = ces.oligoseq(oligomer_id=seq.oligomer_id, imer={seq.name: seq}) oseq.add_sequence(seq_2) obtained_seqnum = [] for aset in chains: for ch in aset: obtained_seqnum.append(oseq.whatseq(ch)) self.assertListEqual(expected_seqnum, obtained_seqnum)
def parseseq(instream, inset=None): """Parse sequence(s). :param instream: Imported-to-string sequence file content (fasta format). :type instream: str :param inset: Sequence IDs to return, if None it returns them all, defaults to None. :type inset: set or dict or str, optional :raises TypeError: When inset a set [str]; or instream is not a string. :return: Parsed sequences. :rtype: dict [str, :class:`crops.elements.sequences.oligoseq`] """ if isinstance(instream, str) is False: logging.critical('Input argument instream should be a string.') raise TypeError if inset is not None: if (not isinstance(inset, str) and not isinstance(inset, dict) and not isinstance(inset, set)): logging.critical('Input argument inset should be a set or, ' 'alternatively a string or a dictionary.') raise TypeError elif isinstance(inset, str): temp = inset inset = set() inset.add(temp) upperset = set() for element in inset: if not isinstance(element, str): logging.critical('Elements in inseq should be strings.') raise TypeError upperset.add(element.upper()) newseqs = {} newid = [] head = '' chain = '' ignore = False ignore = False indx = -1 inseqlines = instream.splitlines() inseqlines.append('') for raw in range(len(inseqlines)): line = inseqlines[raw].rstrip() if (not line or line.startswith(">")) and not ignore: if indx >= 0: if newid['mainid'] not in newseqs: newseqs[newid['mainid']] = oligoseq( oligomer_id=newid['mainid']) aseq = sequence(seqid=newid['seqid'], oligomer=newid['mainid'], seq=chain, chains=newid['chains'], source=newid['source'], header=head, extrainfo=newid['comments']) newseqs[newid['mainid']].add_sequence(aseq) else: pass if line.startswith(">"): newid = retrieve_id(line) head = line indx += 1 chain = '' if inset is not None: ignore = False if newid['mainid'] in upperset else True elif line.startswith("#") or line.startswith(' #'): continue else: if not ignore: chain += str(line) return newseqs