def get_toeholds(self, n_ths=6, timeout=2): from time import time import stickydesign as sd """ Generate specified stickyends for the Soloveichik DSD approach A given run of stickydesign may not generate toeholds that match the Soloveichik approach. This function reports whether the run was successful or not. Otherwise, the toeholds are matched to respect the backwards strand back-to-back toeholds. Args: n_ths: Number of toeholds to generate. (6) timeout: Time duration allowed for finding toeholds in seconds. (8) Returns: List of toehold strings """ # Give StickyDesign a set of trivial, single-nucleotide toeholds to avoid poor # designs. I'm not sure if this helps now, but it did once. avoid_list = [i * int(self.length + 2) for i in ['a', 't']] # Generate toeholds fdev = self.deviation / self.targetdG notoes = True startime = time() while notoes: try: ends = sd.easyends('TD', self.length, interaction=self.targetdG, fdev=fdev, alphabet='h', adjs=['c', 'g'], maxspurious=self.max_spurious, energetics=self, oldends=avoid_list) notoes = len(ends) < n_ths + len(avoid_list) if (time() - startime) > timeout: e_avg, e_spr, e_dev, n_ends = self.calculate_unrestricted_toehold_characteristics( ) msg = "Cannot make toeholds to user specification! Try target energy:{:.2}, maxspurious:{:.2}, deviation:{:.2}, which makes {:d} toeholds." exception = ToeholdSpecificationError( msg.format(e_avg, e_spr, e_dev, n_ends)) raise exception except ValueError: e_avg, e_spr, e_dev, n_ends = self.calculate_unrestricted_toehold_characteristics( ) msg = "Cannot make toeholds to user specification! Try target energy:{:.2}, maxspurious:{:.2}, deviation:{:.2}, which makes {:d} toeholds." exception = ToeholdSpecificationError( msg.format(e_avg, e_spr, e_dev, n_ends)) raise exception th_cands = ends.tolist() # remove "avoid" sequences th_cands = th_cands[len(avoid_list):] # Make as many end in c as possible th_cands = th_cands[:n_ths] th_all = [th[1:-1] for th in th_cands] ends_full = sd.endarray(th_cands, 'TD') ends_all = sd.endarray(th_all, 'TD') return ends_all.tolist()
def __init__(self, tilesys, pairs=None, energetics=None): # Set up variables, etc. if not energetics: self.ef = en.energetics_santalucia(mismatchtype='max') else: self.ef = energetics tilesys = deepcopy(tilesys) self.ends = tilesys['ends'] self.tiles = tilesys['tiles'] self.tilesystem = tilesys if not pairs: pairs = sens.consolidate_pairs(sens.senspairs(tilesys), comcomp=1, onlytop=True) self.initstate = FseqState() self.names = {} fseqsTD, self.names['TD'] = (list(x) for x in zip( *[[end['fseq'].lower(), end['name']] for end in self.ends if end['type'] == 'TD'])) fseqsDT, self.names['DT'] = (list(x) for x in zip( *[[end['fseq'].lower(), end['name']] for end in self.ends if end['type'] == 'DT'])) self.initstate.seqs['TD'] = sd.endarray(fseqsTD, 'TD') self.initstate.seqs['DT'] = sd.endarray(fseqsDT, 'DT') self.enlocs = {} for i, endn in enumerate(self.names['TD']): self.enlocs[endn] = (i, 'TD') for i, endn in enumerate(self.names['DT']): self.enlocs[endn] = (i, 'DT') # Get the mean non-spurious interaction self.meangse = 0.5 * ( np.mean(self.ef.matching_uniform(self.initstate.seqs['TD'])) + np.mean(self.ef.matching_uniform(self.initstate.seqs['DT']))) self.mult = { '1NGO': np.exp(-2.0 * self.meangse), '2NGO': np.exp(-1.65 * self.meangse), '1GO': np.exp(-1.5 * self.meangse), '2GO': np.exp(-1.1 * self.meangse) } self.pairdict = {} for pairclass, memberset in pairs.items(): for x, y in memberset: self.pairdict[(x, ecomp(y))] = pairclass self.pairdict[(y, ecomp(x))] = pairclass
def score_toeholds(self, toeholds): import stickydesign as sd toeholds = [th_set[0] for th_set in toeholds] toeholds_flanked = ['c' + th.lower() + 'c' for th in toeholds] ends = sd.endarray(toeholds_flanked, 'TD') e_vec_ext = self.th_external_dG(ends) e_vec_int = self.th_internal_dG(ends) e_vec_all = np.concatenate((e_vec_int, e_vec_ext)) e_err = np.abs(e_vec_all.mean() - self.targetdG) e_rng = e_vec_all.max() - e_vec_all.min() return (e_err, e_rng)
def score_toeholds(toeholds, targetdG=7.7, e_module=efj): toeholds_flanked = ['c' + th.lower() + 'c' for th in toeholds] ef = e_module.energyfuncs(targetdG=targetdG) ends = sd.endarray(toeholds_flanked, 'TD') e_vec = ef.matching_uniform(ends) e_vec_ext = ef.th_external_dG(ends) e_vec_int = ef.th_internal_dG(ends) e_vec_avg = (e_vec_ext + e_vec_int) / 2 e_vec_all = np.concatenate((e_vec_int, e_vec_ext)) e_avg = e_vec_all.mean() e_rng = e_vec_all.max() - e_vec_all.min() return (e_avg, e_rng)
def score_toeholds(self, toeholds): import stickydesign as sd toeholds = translation.flatten(toeholds) toeholds_flanked = ['c' + th.lower() + 'c' for th in toeholds] ends = sd.endarray(toeholds_flanked, 'TD') e_fn_list = [ self.th_external_3_dG, self.th_external_5_dG, self.th_internal_dG, self.th_external_dG ] e_vec_list = [fn(ends) for fn in e_fn_list] e_vec_all = np.concatenate(e_vec_list) e_err = np.abs(e_vec_all.mean() - self.targetdG) e_rng = e_vec_all.max() - e_vec_all.min() return (e_err, e_rng)
def to_endarrays(self): """Return stickydesign endarrays of each type of (non-hairpin) end. Returns ------- list of stickydesign.endarray the endarrays of ends in the system. """ endtypes = {x['type'] for x in self} endtypes = endtypes - {'hp', 'hairpin'} return list( sd.endarray([x['fseq'] for x in self if x['type'] == y], y) for y in endtypes)
def score_toeholds(toeholds, dGTarget): tops = lambda s: 4 * s[:, :-1] + s[:, 1:] nt = {'a': 0, 'c': 1, 'g': 2, 't': 3} taildG = 1.3 initdG = 0 dsb = resource_stream('stickydesign', 'params/dnastackingbig.csv') nndG_full = -np.loadtxt(dsb, delimiter=',') nndG = nndG_full[np.arange(0, 16), 15 - np.arange(0, 16)] dsb.close() dgl = resource_stream('piperine', 'data/dnadangle.csv') dgldG_full = -np.loadtxt(dgl, delimiter=',') dgldG_fixedC = dgldG_full[1, np.arange(4) + 4 * nt['c']] dgl.close() toeholds = [th_set[0] for th_set in toeholds] toeholds_flanked = ['c' + th.lower() + 'c' for th in toeholds] ends = stickydesign.endarray(toeholds_flanked, 'TD') seqs_len = np.size(ends, 1) cols_external = np.arange(seqs_len - 1) tops_external = tops(ends[:, cols_external]) nndG_external = np.sum(nndG[tops_external], 1) dgldG_external = dgldG_fixedC[ends[:, seqs_len - 2]] e_vec_ext = nndG_external + dgldG_external - taildG - initdG seqs_len = np.size(ends, 1) cols_internal = np.concatenate((np.arange(seqs_len - 2), [seqs_len - 1])) tops_internal = tops(ends[:, cols_internal]) nndG_internal = np.sum(nndG[tops_internal], 1) e_vec_int = nndG_internal - taildG - initdG e_vec_all = np.concatenate((e_vec_int, e_vec_ext)) e_err = np.abs(e_vec_all.mean() - dGTarget) e_rng = e_vec_all.max() - e_vec_all.min() return (e_err, e_rng)
def from_yaml_tileadj(ts): import stickydesign as sd import numpy as np # Combine ends and tile-specified adjacents newtiles = [] newends = [] endslist = set() doubleends = [] doubles = [] newtiles.append({ 'name': 'origami', 'edges': ['origami', 'origami', 'origami', 'origami'], 'stoic': 1e-9, 'color': 'white' }) for tile in ts['seed']['adapters']: newtile = {} newtile['edges'] = ['origami'] + [ re.sub('/', '_c', x) + '_' + y for x, y in zip(tile['ends'], tile['adjs']) ] + ['origami'] endslist.update( set(zip(newtile['edges'][1:3], tile['ends'], tile['adjs']))) if 'name' in tile: newtile['name'] = tile['name'] newtile['stoic'] = 1e-9 newtile['color'] = 'white' newtiles.append(newtile) for tile in ts['tiles']: if tile['type'] == '3up' or tile['type'] == '5up': newtile = {} newtile['edges'] = [ re.sub('/', '_c', x) + '_' + y for x, y in zip(tile['ends'], tile['adjs']) ] endslist.update( set(zip(newtile['edges'], tile['ends'], tile['adjs']))) if 'name' in tile: newtile['name'] = tile['name'] if 'conc' in tile: newtile['stoic'] = tile['conc'] if 'color' in tile: newtile['color'] = tile['color'] newtiles.append(newtile) if tile['type'] == '3up5up' or tile['type'] == '5up3up': newtile1 = {} newtile2 = {} newtile1['edges'] = [ re.sub('/','_c',x)+'_'+y for x,y in zip(tile['ends'][0:1],tile['adjs'][0:1]) ] \ + [ tile['name']+'_db' ] \ + [ re.sub('/','_c',x)+'_'+y for x,y in zip(tile['ends'][4:],tile['adjs'][4:]) ] newtile2['edges'] = [ re.sub('/','_c',x)+'_'+y for x,y in zip(tile['ends'][1:4],tile['adjs'][1:4]) ] \ + [ tile['name']+'_db' ] endslist.update( set( zip([ re.sub('/', '_c', x) + '_' + y for x, y in zip(tile['ends'], tile['adjs']) ], tile['ends'], tile['adjs']))) newtile1['name'] = tile['name'] + '_left' newtile2['name'] = tile['name'] + '_right' doubleends.append(tile['name'] + '_db') doubles.append((newtile1['name'], newtile2['name'])) if 'conc' in tile: newtile1['stoic'] = tile['conc'] newtile2['stoic'] = tile['conc'] if 'color' in tile: newtile1['color'] = tile['color'] newtile2['color'] = tile['color'] newtiles.append(newtile1) newtiles.append(newtile2) newends.append({'name': 'origami', 'strength': 100}) for end in doubleends: newends.append({'name': end, 'strength': 100}) for end in endslist: newends.append({'name': end[0], 'strength': 0}) xga = {} xga['doubletiles'] = [list(x) for x in doubles] xga.update(ts['xgrow_options']) # Now finally, deal with glues... in a perfect world first? # gluelist = [] # for end1 in endslist: # if end1[1][-1] == '/': continue # for end2 in ( x for x in endslist if x[1]==end1[1]+'/' ): # gluelist.append( [end1[0],end2[0],1] ) # ef = sd.energyfuncs_santalucia(mismatchtype='max') # # gluelist = [] # for end1 in endslist: # if end1[1][-1] != '/': # ed1 = [ x for x in ts['ends'] if x['name'] == end1[1] ][0] # else: # edd1 = [ x for x in ts['ends'] if x['name'] == end1[1][:-1] ][0] # ed1 = { 'type': edd1['type'], 'seq': revcomp(edd1['seq'])} # if (ed1['type'] == 'fake') or (ed1['type'] == 'fakeDT') or (ed1['type'] == 'fakeTD'): # continue # elif (ed1['type'] == 'DT'): # e1 = sd.endarray([(end1[2]+ed1['seq']).lower()],'DT') # elif (ed1['type'] == 'TD'): # e1 = sd.endarray([(ed1['seq']+end1[2]).lower()],'TD') # for end2 in endslist: # if end2[1][-1] != '/': # ed2 = [ x for x in ts['ends'] if x['name'] == end2[1] ][0] # else: # edd2 = [ x for x in ts['ends'] if x['name'] == end2[1][:-1] ][0] # ed2 = { 'type': edd2['type'], 'seq': revcomp(edd2['seq'])} # if ed1['type'] != ed2['type']: # continue # elif (ed1['type'] == 'DT'): # e2 = sd.endarray([(end2[2]+ed2['seq']).lower()],'DT') # elif (ed1['type'] == 'TD'): # e2 = sd.endarray([(ed2['seq']+end2[2]).lower()],'TD') # gluelist.append( [end1[0], end2[0], float(ef.uniform(e1,e2)[0])]) dtl1 = [] dtl2 = [] tdl1 = [] tdl2 = [] dtn1 = [] dtn2 = [] tdn1 = [] tdn2 = [] for end1 in endslist: if end1[1][-1] != '/': ed1 = [x for x in ts['ends'] if x['name'] == end1[1]][0] else: edd1 = [x for x in ts['ends'] if x['name'] == end1[1][:-1]][0] ed1 = {'type': edd1['type'], 'seq': revcomp(edd1['seq'])} if (ed1['type'] == 'fake') or (ed1['type'] == 'fakeDT') or (ed1['type'] == 'fakeTD'): continue for end2 in endslist: if end2[1][-1] != '/': ed2 = [x for x in ts['ends'] if x['name'] == end2[1]][0] else: edd2 = [x for x in ts['ends'] if x['name'] == end2[1][:-1]][0] ed2 = {'type': edd2['type'], 'seq': revcomp(edd2['seq'])} if ed1['type'] != ed2['type']: continue elif (ed1['type'] == 'DT'): dtl1.append((end1[2] + ed1['seq']).lower()) dtl2.append((end2[2] + ed2['seq']).lower()) dtn1.append(end1[0]) dtn2.append(end2[0]) elif (ed1['type'] == 'TD'): tdl1.append((ed1['seq'] + end1[2]).lower()) tdl2.append((ed2['seq'] + end2[2]).lower()) tdn1.append(end1[0]) tdn2.append(end2[0]) dta1 = sd.endarray(dtl1, 'DT') dta2 = sd.endarray(dtl2, 'DT') tda1 = sd.endarray(tdl1, 'TD') tda2 = sd.endarray(tdl2, 'TD') dtg = ef.uniform(dta1, dta2) tdg = ef.uniform(tda1, tda2) dtgl = set([ tuple(sorted([x, y]) + [float(z)]) for x, y, z in zip(dtn1, dtn2, dtg) ]) tdgl = set([ tuple(sorted([x, y]) + [float(z)]) for x, y, z in zip(tdn1, tdn2, tdg) ]) gluelist = [list(x) for x in dtgl.union(tdgl)] sts = { 'tiles': newtiles, 'bonds': newends, 'xgrowargs': xga, 'glues': gluelist } return sts
def from_yaml_endadj(ts, perfect=False, rotate=False): import stickydesign as sd import stickydesign.energetics as en import numpy as np # Combine ends and tile-specified adjacents newtiles = [] newends = [] endslist = set() doubleends = [] doubles = [] vdoubleends = [] vdoubles = [] newtiles.append({ 'name': 'origami', 'edges': ['origami', 'origami', 'origami', 'origami'], 'stoic': 0, 'color': 'white' }) atiles = [None] * 16 for tilename in ts['seed']['use_adapters']: tile = [x for x in ts['seed']['adapters'] if x['name'] == tilename][0] newtile = {} newtile['edges'] = ['origami' ] + [re.sub('/', '_c', x) for x in tile['ends']] + ['origami'] newtile['name'] = tile['name'] newtile['stoic'] = 0 newtile['color'] = 'white' atiles[tile['loc'] - 1] = newtile for tile in atiles: if tile: newtiles.append(newtile) else: newtiles.append({ 'name': 'emptyadapt', 'edges': ['origami', 0, 0, 'origami'], 'stoic': 0, 'color': 'white' }) if rotate: rotatedtiles = [] for tile in ts['tiles']: if tile['type'] == 'tile_daoe_3up' or tile[ 'type'] == 'tile_daoe_5up': newtile = copy.copy(tile) newtile['name'] += '_lrf' newtile['ends'] = [tile['ends'][x] for x in (1, 0, 3, 2)] rotatedtiles.append(newtile) newtile = copy.copy(tile) newtile['name'] += '_udf' newtile['type'] = 'tile_daoe_' + { '5up': '3up', '3up': '5up' }[tile['type'][-3:]] newtile['ends'] = [tile['ends'][x] for x in (3, 2, 1, 0)] rotatedtiles.append(newtile) newtile = copy.copy(tile) newtile['name'] += '_bf' newtile['type'] = 'tile_daoe_' + { '5up': '3up', '3up': '5up' }[tile['type'][-3:]] newtile['ends'] = [tile['ends'][x] for x in (2, 3, 0, 1)] rotatedtiles.append(newtile) elif tile['type'] == 'tile_daoe_doublehoriz_35up': newtile = copy.copy(tile) newtile['name'] += '_lrf' newtile['type'] = 'tile_daoe_doublevert_53up' newtile['ends'] = [tile['ends'][x] for x in (2, 1, 0, 5, 4, 3)] rotatedtiles.append(newtile) newtile = copy.copy(tile) newtile['name'] += '_udf' newtile['type'] = 'tile_daoe_doublevert_53up' newtile['ends'] = [tile['ends'][x] for x in (5, 4, 3, 2, 1, 0)] rotatedtiles.append(newtile) newtile = copy.copy(tile) newtile['name'] += '_bf' newtile['ends'] = [tile['ends'][x] for x in (3, 4, 5, 0, 1, 2)] rotatedtiles.append(newtile) elif tile['type'] == 'tile_daoe_doublevert_35up': newtile = copy.copy(tile) newtile['name'] += '_lrf' newtile['type'] = 'tile_daoe_doublehoriz_53up' newtile['ends'] = [tile['ends'][x] for x in (2, 1, 0, 5, 4, 3)] rotatedtiles.append(newtile) newtile = copy.copy(tile) newtile['name'] += '_udf' newtile['type'] = 'tile_daoe_doublehoriz_53up' newtile['ends'] = [tile['ends'][x] for x in (5, 4, 3, 2, 1, 0)] rotatedtiles.append(newtile) newtile = copy.copy(tile) newtile['name'] += '_bf' newtile['ends'] = [tile['ends'][x] for x in (3, 4, 5, 0, 1, 2)] rotatedtiles.append(newtile) ts['tiles'] += rotatedtiles for tile in ts['tiles']: if tile['type'] == 'tile_daoe_3up' or tile['type'] == 'tile_daoe_5up': newtile = {} newtile['edges'] = [re.sub('/', '_c', x) for x in tile['ends']] if 'name' in tile: newtile['name'] = tile['name'] if 'conc' in tile: newtile['stoic'] = tile['conc'] if 'color' in tile: newtile['color'] = tile['color'] newtiles.append(newtile) if tile['type'] == 'tile_daoe_doublehoriz_35up' or tile[ 'type'] == 'tile_daoe_doublehoriz_53up': newtile1 = {} newtile2 = {} newtile1['edges'] = [ re.sub('/','_c',x) for x in tile['ends'][0:1] ] \ + [ tile['name']+'_db' ] \ + [ re.sub('/','_c',x) for x in tile['ends'][4:] ] newtile2['edges'] = [ re.sub('/','_c',x) for x in tile['ends'][1:4] ] \ + [ tile['name']+'_db' ] newtile1['name'] = tile['name'] + '_left' newtile2['name'] = tile['name'] + '_right' doubleends.append(tile['name'] + '_db') doubles.append((newtile1['name'], newtile2['name'])) if 'conc' in tile: newtile1['stoic'] = tile['conc'] newtile2['stoic'] = tile['conc'] if 'color' in tile: newtile1['color'] = tile['color'] newtile2['color'] = tile['color'] newtiles.append(newtile1) newtiles.append(newtile2) if tile['type'] == 'tile_daoe_doublevert_35up' or tile[ 'type'] == 'tile_daoe_doublevert_53up': newtile1 = {} newtile2 = {} newtile1['edges'] = [ re.sub('/','_c',x) for x in tile['ends'][0:2] ] \ + [ tile['name']+'_db' ] \ + [ re.sub('/','_c',x) for x in tile['ends'][5:] ] newtile2['edges'] = [tile['name'] + '_db'] + [ re.sub('/', '_c', x) for x in tile['ends'][2:5] ] newtile1['name'] = tile['name'] + '_top' newtile2['name'] = tile['name'] + '_bottom' vdoubleends.append(tile['name'] + '_db') vdoubles.append((newtile1['name'], newtile2['name'])) if 'conc' in tile: newtile1['stoic'] = tile['conc'] newtile2['stoic'] = tile['conc'] if 'color' in tile: newtile1['color'] = tile['color'] newtile2['color'] = tile['color'] newtiles.append(newtile1) newtiles.append(newtile2) newends.append({'name': 'origami', 'strength': 100}) for end in doubleends: newends.append({'name': end, 'strength': 10}) for end in vdoubleends: newends.append({'name': end, 'strength': 10}) gluelist = [] if not perfect: glueends = {'DT': [], 'TD': []} for end in ts['ends']: newends.append({'name': end['name'], 'strength': 0}) newends.append({'name': end['name'] + '_c', 'strength': 0}) if (end['type'] == 'TD') or (end['type'] == 'DT'): glueends[end['type']].append((end['name'], end['fseq'])) ef = en.energetics_santalucia(mismatchtype='max') for t in ['DT', 'TD']: names, fseqs = zip(*glueends[t]) allnames = names + tuple(x + '_c' for x in names) ea = sd.endarray(fseqs, t) ar = sd.energy_array_uniform(ea, ef) for i1, n1 in enumerate(names): for i2, n2 in enumerate(allnames): gluelist.append([n1, n2, float(ar[i1, i2])]) else: if 'ends' not in ts.keys(): ts['ends'] = [] endsinlist = set(e['name'] for e in ts['ends']) endsintiles = set() for tile in ts['tiles']: endsintiles.update( re.sub('/', '', e) for e in tile['ends'] if e != 'hp') for end in ts['ends'] + list({'name': e} for e in endsintiles): newends.append({'name': end['name'], 'strength': 0}) newends.append({'name': end['name'] + '_c', 'strength': 0}) gluelist.append([end['name'], end['name'] + '_c', 1.0]) newends.append({'name': 'hp', 'strength': 0}) xga = {} xga['doubletiles'] = [list(x) for x in doubles] xga['vdoubletiles'] = [list(x) for x in vdoubles] xga.update(ts['xgrow_options']) xga.update(ts['xgrow_options']) sts = { 'tiles': newtiles, 'bonds': newends, 'xgrowargs': xga, 'glues': gluelist } return sts
def sets_daoe(): r5dt = endarray(np.random.randint(low=0, high=4, size=(100, 7)), 'DT') r5td = endarray(np.random.randint(low=0, high=4, size=(100, 7)), 'TD') r10dt = endarray(np.random.randint(low=0, high=4, size=(100, 12)), 'DT') r10td = endarray(np.random.randint(low=0, high=4, size=(100, 12)), 'TD') return [r5dt, r5td, r10dt, r10td]
def sets_basic(): r7s = endarray(np.random.randint(low=0, high=4, size=(100, 7)), 'S') r12s = endarray(np.random.randint(low=0, high=4, size=(100, 12)), 'S') return [r7s, r12s]
def get_toeholds(n_ths=6, thold_l=int(7.0), thold_e=7.7, e_dev=0.5, m_spurious=0.4, e_module=efj, timeout=8): """ Generate specified stickyends for the Soloveichik DSD approach A given run of stickydesign may not generate toeholds that match the Soloveichik approach. This function reports whether the run was successful or not. Otherwise, the toeholds are matched to respect the backwards strand back-to-back toeholds. Args: ef: Stickydesign easyends object n_ths: Number of signal strand species to generate toeholds for thold_l: Nucleotides in the toeholds thold_e: Target binding energy for the toeholds in kcal/mol e_dev: Allowable energy deviation in kcal/mol m_spurious: Maximum spurious interaction strength as a ratio of target energy Returns: ends_all: Stickydesign stickyends object (e_avg, e_rng): Average and range (max minus min) of toehold energies """ # Give the energetics instance the target energy ef = e_module.energyfuncs(targetdG=thold_e) # Give StickyDesign a set of trivial, single-nucleotide toeholds to avoid poor # designs. I'm not sure if this helps now, but it did once. avoid_list = [i * int(thold_l + 2) for i in ['a', 'c', 't']] # Generate toeholds notoes = True startime = time() while notoes: try: ends = sd.easyends('TD', thold_l, interaction=thold_e, fdev=e_dev / thold_e, alphabet='h', adjs=['c', 'g'], maxspurious=m_spurious, energetics=ef, oldends=avoid_list) notoes = len(ends) < n_ths + len(avoid_list) except ValueError as e: if (time() - startime) > timeout: return -1 noetoes = True th_cands = ends.tolist() # remove "avoid" sequences th_cands = th_cands[len(avoid_list):] # Make as many end in c as possible th_cands = th_cands[:n_ths] th_all = [th[1:-1] for th in th_cands] ends_full = sd.endarray(th_cands, 'TD') ends_all = sd.endarray(th_all, 'TD') return ends_all.tolist()
def create_sticky_end_sequences( tileset, energetics=None ): """\ Create sticky end sequences for a tileset, using stickydesign. This new version should be more flexible, and should be able to keep old sticky ends, accounting for them in creating new ones. Parameters ---------- tileset: the tileset to create sticky ends sequences for. This will be copied and returned, not modified. energetics: the energetics instance to use for the design. If None (default), will use alhambra.designer.default_energetics. Outputs (tileset, new_ends) where new_ends is a list of new end names that were designed. """ if not energetics: energetics = default_energetics # Steps for doing this: # Create a copy of the tileset. newtileset = copy.deepcopy(tileset) # Build a list of ends from the endlist in the tileset. Do this # by creating a named_list, then merging them into it. ends = util.named_list() if 'ends' in newtileset.keys(): ends = util.merge_endlists( ends, newtileset['ends'], fail_immediate=False, in_place=True ) # This is the endlist from the tiles themselves. if 'tiles' in newtileset.keys(): # maybe you just want ends? # this checks for end/complement usage, and whether any # previously-describedends are unused # FIXME: implement #tiletypes.check_end_usage(newtileset['tiles'], ends) endlist_from_tiles = tiletypes.endlist_from_tilelist( newtileset['tiles'] ) ends = util.merge_endlists( ends, endlist_from_tiles, in_place=True ) # Ensure that if there are any resulting completely-undefined ends, they # have their sequences removed. for end in ends: if 'fseq' in end.keys() and end['fseq']=='nnnnnnn': del(end['fseq']) # Build inputs suitable for stickydesign: lists of old sequences for TD/DT, # and numbers of new sequences needed. oldDTseqs = [ end['fseq'] for end in ends \ if 'fseq' in end.keys() and end['type']=='DT' ] oldTDseqs = [ end['fseq'] for end in ends \ if 'fseq' in end.keys() and end['type']=='TD' ] newTDnames = [ end['name'] for end in ends \ if 'fseq' not in end.keys() and end['type']=='TD' ] newDTnames = [ end['name'] for end in ends \ if 'fseq' not in end.keys() and end['type']=='DT' ] # Deal with energetics, considering potential old sequences. # FIXME: EXPLAIN WHAT THIS ABSTRUSE CODE DOES... # TODO: tests needs to test this targets = [] if len(oldDTseqs)==0 and len(oldTDseqs)==0: targets.append( sd.enhist( 'DT', 5, energetics=energetics)[2]['emedian'] ) targets.append( sd.enhist( 'TD', 5, energetics=energetics)[2]['emedian'] ) if len(oldDTseqs)>0: targets.append( energetics.matching_uniform(sd.endarray(oldDTseqs,'DT')) ) if len(oldTDseqs)>0: targets.append( energetics.matching_uniform(sd.endarray(oldTDseqs,'TD')) ) targetint = np.average(targets) # Create new sequences. newTDseqs = sd.easyends( 'TD', 5, number=len(newTDnames), energetics=energetics, interaction=targetint).tolist() newDTseqs = sd.easyends( 'DT', 5, number=len(newDTnames), energetics=energetics, interaction=targetint).tolist() # FIXME: move to stickydesign assert len(newTDseqs) == len(newTDnames) assert len(newDTseqs) == len(newDTnames) # Shuffle the lists of end sequences, to ensure that they're random order, and that ends # used earlier in the set are not always better than those used later. shuffle(newTDseqs) shuffle(newDTseqs) for name,seq in zip(newDTnames,newDTseqs): ends[name]['fseq'] = seq for name,seq in zip(newTDnames,newTDseqs): ends[name]['fseq'] = seq ends.check_consistent() # Ensure that the old and new sets have consistent end definitions, # and that the tile definitions still fit. tiletypes.merge_endlists( tileset['ends'], ends ) tiletypes.merge_endlists( tiletypes.endlist_from_tilelist(newtileset['tiles']), ends ) # Apply new sequences to tile system. newtileset['ends'] = ends return (newtileset, newTDnames+newDTnames)
def __init__(self, tilesys, newends=None, pairs=None, energetics=None, inputpairs=False, multiscore=False): tilesys = deepcopy(tilesys) self.ends = tilesys.ends self.tiles = tilesys.tiles self.tilesystem = tilesys self.ef = energetics self.multiscore = multiscore if not pairs: pairs = sens.consolidate_pairs(sens.senspairs(tilesys), comcomp=1, onlytop=True) if inputpairs: inputpairs = [ tuple(z[:-1] for z, i in zip(x['ends'], x['input']) if i) for x in self.tiles ] print(inputpairs) self.inputpairs = inputpairs self.names = {} fseqsTD, self.names['TD'] = (list(x) for x in zip( *[[end['fseq'].lower(), end['name']] for end in self.ends if end['type'] == 'TD'])) fseqsDT, self.names['DT'] = (list(x) for x in zip( *[[end['fseq'].lower(), end['name']] for end in self.ends if end['type'] == 'DT'])) self.seqs = {} self.seqs['TD'] = sd.endarray(fseqsTD, 'TD') self.seqs['DT'] = sd.endarray(fseqsDT, 'DT') self.initstate = FastState({ 'DT': np.arange(0, len(self.seqs['DT'])), 'TD': np.arange(0, len(self.seqs['TD'])) }) self.enlocs = {} for i, endn in enumerate(self.names['TD']): self.enlocs[endn] = (i, 'TD') for i, endn in enumerate(self.names['DT']): self.enlocs[endn] = (i, 'DT') # ends that can be reordered. if newends: self.mutableTD = [ i for i, t in [self.enlocs[x] for x in newends] if t == 'TD' ] self.mutableDT = [ i for i, t in [self.enlocs[x] for x in newends] if t == 'DT' ] else: self.mutableTD = range(0, len(fseqsTD)) self.mutableDT = range(0, len(fseqsDT)) self.pairdict = {} for pairclass, memberset in pairs.items(): for x, y in memberset: self.pairdict[(x, ecomp(y))] = pairclass self.pairdict[(y, ecomp(x))] = pairclass tdsh = (len(self.seqs['TD']), len(self.seqs['TD'])) dtsh = (len(self.seqs['DT']), len(self.seqs['DT'])) if not multiscore: # Get the mean non-spurious interaction self.meangse = 0.5 * ( np.mean(self.ef.matching_uniform(self.seqs['TD'])) + np.mean(self.ef.matching_uniform(self.seqs['DT']))) self.mult = { '1NGO': np.exp(-2.0 * self.meangse), '2NGO': np.exp(-1.65 * self.meangse), '1GO': np.exp(-1.5 * self.meangse), '2GO': np.exp(-1.1 * self.meangse), 'I': np.exp(-1.0 * self.meangse) } self.ecache_cc = { 'TD': cachedarray( lambda x, y: self.ef.uniform( self.seqs['TD'][x:x + 1].comps, self.seqs['TD'][ y:y + 1].comps), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform( self.seqs['DT'][x:x + 1].comps, self.seqs['DT'][ y:y + 1].comps), dtsh) } self.ecache_ce = { 'TD': cachedarray( lambda x, y: self.ef.uniform( self.seqs['TD'][x:x + 1].comps, self.seqs['TD'][ y:y + 1].ends), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform( self.seqs['DT'][x:x + 1].comps, self.seqs['DT'][ y:y + 1].ends), dtsh) } self.ecache_ec = { 'TD': cachedarray( lambda x, y: self.ef.uniform( self.seqs['TD'][x:x + 1].ends, self.seqs['TD'][y:y + 1] .comps), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform( self.seqs['DT'][x:x + 1].ends, self.seqs['DT'][y:y + 1] .comps), dtsh) } self.ecache_ee = { 'TD': cachedarray( lambda x, y: self.ef.uniform(self.seqs['TD'][ x:x + 1].ends, self.seqs['TD'][y:y + 1].ends), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform(self.seqs['DT'][ x:x + 1].ends, self.seqs['DT'][y:y + 1].ends), dtsh) } else: self.ecache_cc = [] self.ecache_ee = [] self.ecache_ce = [] self.ecache_ec = [] self.mult = [] self.meangse = [] for ef in self.ef: self.meangse.append( 0.5 * (np.mean(ef.matching_uniform(self.seqs['TD'])) + np.mean(ef.matching_uniform(self.seqs['DT'])))) self.mult.append({ '1NGO': np.exp(-2.0 * self.meangse[-1]), '2NGO': np.exp(-1.65 * self.meangse[-1]), '1GO': np.exp(-1.5 * self.meangse[-1]), '2GO': np.exp(-1.1 * self.meangse[-1]), 'I': np.exp(-1.0 * self.meangse[-1]) }) self.ecache_cc.append({ 'TD': cachedarray( lambda x, y: ef.uniform(self.seqs['TD'][ x:x + 1].comps, self.seqs['TD'][y:y + 1].comps), tdsh), 'DT': cachedarray( lambda x, y: ef.uniform(self.seqs['DT'][ x:x + 1].comps, self.seqs['DT'][y:y + 1].comps), dtsh) }) self.ecache_ce.append({ 'TD': cachedarray( lambda x, y: ef.uniform(self.seqs['TD'][x:x + 1].comps, self.seqs['TD'][y:y + 1].ends), tdsh), 'DT': cachedarray( lambda x, y: ef.uniform(self.seqs['DT'][x:x + 1].comps, self.seqs['DT'][y:y + 1].ends), dtsh) }) self.ecache_ec.append({ 'TD': cachedarray( lambda x, y: ef.uniform(self.seqs['TD'][ x:x + 1].ends, self.seqs['TD'][y:y + 1].comps), tdsh), 'DT': cachedarray( lambda x, y: ef.uniform(self.seqs['DT'][ x:x + 1].ends, self.seqs['DT'][y:y + 1].comps), dtsh) }) self.ecache_ee.append({ 'TD': cachedarray( lambda x, y: ef.uniform(self.seqs['TD'][x:x + 1].ends, self.seqs['TD'][y:y + 1].ends), tdsh), 'DT': cachedarray( lambda x, y: ef.uniform(self.seqs['DT'][x:x + 1].ends, self.seqs['DT'][y:y + 1].ends), dtsh) })
def __init__(self, tilesys, newends=None, pairs=None, energetics=None): # Set up variables, etc. if not energetics: self.ef = en.energetics_santalucia(mismatchtype='max') else: self.ef = energetics tilesys = deepcopy(tilesys) self.ends = tilesys['ends'] self.tiles = tilesys['tiles'] self.tilesystem = tilesys if not pairs: pairs = sens.consolidate_pairs(sens.senspairs(tilesys), comcomp=1, onlytop=True) self.names = {} fseqsTD, self.names['TD'] = (list(x) for x in zip( *[[end['fseq'].lower(), end['name']] for end in self.ends if end['type'] == 'TD'])) fseqsDT, self.names['DT'] = (list(x) for x in zip( *[[end['fseq'].lower(), end['name']] for end in self.ends if end['type'] == 'DT'])) self.seqs = {} self.seqs['TD'] = sd.endarray(fseqsTD, 'TD') self.seqs['DT'] = sd.endarray(fseqsDT, 'DT') self.initstate = FastState({ 'DT': np.arange(0, len(self.seqs['DT'])), 'TD': np.arange(0, len(self.seqs['TD'])) }) self.enlocs = {} for i, endn in enumerate(self.names['TD']): self.enlocs[endn] = (i, 'TD') for i, endn in enumerate(self.names['DT']): self.enlocs[endn] = (i, 'DT') # ends that can be reordered. if newends: self.mutableTD = [ i for i, t in [self.enlocs[x] for x in newends] if t == 'TD' ] self.mutableDT = [ i for i, t in [self.enlocs[x] for x in newends] if t == 'DT' ] else: self.mutableTD = range(0, len(fseqsTD)) self.mutableDT = range(0, len(fseqsDT)) # Get the mean non-spurious interaction self.meangse = 0.5 * ( np.mean(self.ef.matching_uniform(self.seqs['TD'])) + np.mean(self.ef.matching_uniform(self.seqs['DT']))) self.mult = { '1NGO': np.exp(-2.0 * self.meangse), '2NGO': np.exp(-1.65 * self.meangse), '1GO': np.exp(-1.5 * self.meangse), '2GO': np.exp(-1.1 * self.meangse) } self.pairdict = {} for pairclass, memberset in pairs.items(): for x, y in memberset: self.pairdict[(x, ecomp(y))] = pairclass self.pairdict[(y, ecomp(x))] = pairclass tdsh = (len(self.seqs['TD']), len(self.seqs['TD'])) dtsh = (len(self.seqs['DT']), len(self.seqs['DT'])) self.ecache_cc = { 'TD': cachedarray( lambda x, y: self.ef.uniform(self.seqs['TD'][ x:x + 1].comps, self.seqs['TD'][y:y + 1].comps), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform(self.seqs['DT'][ x:x + 1].comps, self.seqs['DT'][y:y + 1].comps), dtsh) } self.ecache_ce = { 'TD': cachedarray( lambda x, y: self.ef.uniform(self.seqs['TD'][ x:x + 1].comps, self.seqs['TD'][y:y + 1].ends), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform(self.seqs['DT'][ x:x + 1].comps, self.seqs['DT'][y:y + 1].ends), dtsh) } self.ecache_ec = { 'TD': cachedarray( lambda x, y: self.ef.uniform(self.seqs['TD'][ x:x + 1].ends, self.seqs['TD'][y:y + 1].comps), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform(self.seqs['DT'][ x:x + 1].ends, self.seqs['DT'][y:y + 1].comps), dtsh) } self.ecache_ee = { 'TD': cachedarray( lambda x, y: self.ef.uniform(self.seqs['TD'][ x:x + 1].ends, self.seqs['TD'][y:y + 1].ends), tdsh), 'DT': cachedarray( lambda x, y: self.ef.uniform(self.seqs['DT'][ x:x + 1].ends, self.seqs['DT'][y:y + 1].ends), dtsh) }