def get_overhang_segs(overhang_ds_edges, overhang_ss_edges, ss_overhang_sequences): overhang_segs = {} for index in overhang_ds_edges: e = overhang_ds_edges[index] tmp = copy( (mrdna.DoubleStrandedSegment(name='helix%s' % (index + 100, ), num_bp=int(e.nBp), start_position=list(e.nStart_c), end_position=list(e.nStop_c)))) overhang_segs[e.index] = tmp ss_overhang_segs = {} for index in overhang_ss_edges: e = overhang_ss_edges[index] if e.out_orientation == 5: tmp = copy( (mrdna.SingleStrandedSegment(name='helix%s' % (index + 200, ), num_nt=int(e.nBp), start_position=list(e.nStart_c), end_position=list(e.nStop_c)))) elif e.out_orientation == 3: tmp = copy((mrdna.SingleStrandedSegment( name='helix%s' % (index + 200, ), num_nt=int(e.nBp), start_position=list(e.nStop_c), end_position=list(e.nStart_c), ))) tmp.sequence = ss_overhang_sequences[index] ss_overhang_segs[e.index] = (tmp) return (overhang_segs, ss_overhang_segs)
def create_helix(self): if self.is_dsdna: self.dna = mrdna.DoubleStrandedSegment(name=self.name, start_position = self.start_position, end_position=self.end_position, num_bp = self.num_bp) else: self.dna = mrdna.SingleStrandedSegment(name=self.name, start_position = self.start_position, end_position=self.end_position, num_nt = self.num_bp)
def get_segs(edges): segs = {} for index, e in enumerate(edges): segs[tuple(e.index)] = (mrdna.DoubleStrandedSegment( name='helix%s' % (index, ), num_bp=e.nBp, start_position=e.nStart_c, end_position=e.nStop_c)) return segs
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS, nicks=1, overhangs=None, turberfieldnicks=0, ldmoverhangs=None): tn = turberfieldnicks lo = ldmoverhangs face_data, locs = get_face_data(FNAME) assign_nicks(face_data) mentioned_edges = get_mentioned_edges(face_data) mentioned_edges = do_orientation_assignment(face_data, mentioned_edges) edges = [edge(i, locs[i[0]], locs[i[1]]) for i in mentioned_edges] min_length = np.min([x.rawlen for x in edges]) if overhangs is not None: overhang_ds_edges, overhang_ss_edges, ss_overhang_sequences = get_overhang_edges( overhangs, locs, min_length, LENGTH_OF_SMALLEST) for i in edges + list(overhang_ds_edges.values()) + list( overhang_ss_edges.values()): i.normalize(min_length, LENGTH_OF_SMALLEST) else: for i in edges: i.normalize(min_length, LENGTH_OF_SMALLEST) segs = get_segs(edges) if overhangs is not None: overhang_segs, ss_overhang_segs = get_overhang_segs( overhang_ds_edges, overhang_ss_edges, ss_overhang_sequences) if overhangs is not None: connect_ssdna_to_overhangs(overhang_ss_edges, ss_overhang_segs, overhang_ds_edges, overhang_segs) single_stranded_dna = [] ssDNA_index = 0 ldm_breakable_segs = [] for f in face_data: for con in f.connections: vertex_index = intersection(con[0], con[1])[0] face_index = f.index overhang_here, ss_here = is_there_an_overhang_here( f, con, overhangs) ssDNA_index += 1 c1, c2 = con c1_positive = c1 in segs c2_positive = c2 in segs ldm_break = False if type(ldmoverhangs) != type(None): for i in range(len(ldmoverhangs)): if (ldmoverhangs.iloc[i]['face'] == face_index and ldmoverhangs.iloc[i]['overhang'] == vertex_index): ldm_break = True if overhang_here: single_stranded_dna.extend( join_overhang(ss_here, c1_positive, c2_positive, overhang_segs, segs, c1, c2, vertex_index)) else: if not ldm_break: single_stranded_dna.extend( join_segments(c1_positive, c2_positive, segs, c1, c2, ssDNA_index, SPACERS)) elif ldm_break: #here we add our specific sequence... maybe just add a sequence, then some nones? #work out which segs need to be broken up segs_broken = [] for i in f.e: if vertex_index in i: if i[-1] == vertex_index: segs_broken.append(i) else: segs_broken.append(i[::-1]) ldm_breakable_segs.append(segs_broken) #reorient the segs broken appropriately, so the vertex is at the end. #break those segs! ldm_replacements = [] for pair in ldm_breakable_segs: for seg_name in pair: if seg_name in segs: print('Woo') to_be_broken = segs[seg_name] segs.pop(seg_name) s = to_be_broken.start_position e = to_be_broken.end_position nts = to_be_broken.num_nt overhang_length = 14 #obviously actually find this from the file. # START SIDE. start_1 = s end_1 = s + (e - s) * (nts - overhang_length) / float(nts) nbp_1 = nts - overhang_length # OVERHANG SIDE. start_2 = end_1 end_2 = e nbp_2 = overhang_length start_seg = mrdna.DoubleStrandedSegment(name='helix%s' % (np.random.rand()), num_bp=nbp_1, start_position=start_1, end_position=end_1) end_seg = mrdna.DoubleStrandedSegment(name='helix%s' % (np.random.rand()), num_bp=nbp_2, start_position=start_2, end_position=end_2) start_seg.connect_end3( end_seg.start5 ) #only works if direction of strand is anticlockwise around face #start_seg.connect_end5(end_seg.start3) #only works if direction of strand is anticlockwise around face #now we reconnect this guy breakpoint() #TODOTODOTODOTODOTODO print(to_be_broken) for connection in to_be_broken.connections: A = connection.A B = connection.B breakpoint() if B.container == to_be_broken: if B.on_fwd_strand: A.container.connections.append( mrdna.segmentmodel.Connection( A, end_seg.start5, type_=connection.type_)) else: A.container.connections.append( mrdna.segmentmodel.Connection( A.end3, start_seg, type_=connection.type_)) else: pass ''' if A.on_fwd_strand: #connect out end B.container.connections.append( mrdna.segmentmodel.Connection(B,end_seg, type_=connection.type_)) else: B.container.connections.append( mrdna.segmentmodel.Connection(B,start_seg, type_=connection.type_)) ''' breakpoint() #reconnect prexisting connections! ldm_replacements.append(start_seg) ldm_replacements.append(end_seg) #breakpoint() else: print('Boo!') no_nick_faces = [] if type(overhangs) != type(None): #if overhangs != None: no_nick_faces = list(overhangs['face']) if nicks: print("adding nicks!") for f in face_data: nick = f.nick if ((nick in segs) and (f.index not in no_nick_faces)): segs[nick].add_nick(10, on_fwd_strand=True) else: print('failure') #make sure that all the ssDNA has the sequence 'TTT...' for s in single_stranded_dna: s.sequence = s.num_nt * 'T' if overhangs is not None: segs_list = [segs[i] for i in segs] + single_stranded_dna + [ overhang_segs[i] for i in overhang_segs ] + [ss_overhang_segs[i] for i in ss_overhang_segs] else: segs_list = [segs[i] for i in segs] + single_stranded_dna return segs_list
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS,nicks=1,overhangs = None): locs,faces = read_ply(FNAME) mentioned_edges = set() face_data = [] for index,i in enumerate(faces): print (FACE) face_data.append(FACE(i,index)) #NICKS #now we work out where the nicks are so we can choose the correct orientation of our strands. assign_nicks(face_data) nick_locs = [] for f in face_data: nick_locs.append(f.nick) mentioned_edges = set(nick_locs) #NICKS #choose orientation for i in face_data: for e in i.e: if e not in mentioned_edges and e[::-1] not in mentioned_edges: i.ori.append(1) mentioned_edges.add(e) else: i.ori.append(-1) edges = [] for i in mentioned_edges: edges.append( edge(i,locs[i[0]],locs[i[1]]) ) min_length = np.min([x.rawlen for x in edges]) max_length = np.max([x.rawlen for x in edges]) #ds overhangs #rework this data structure... if overhangs is not None: #overhang_ss_edges_sequences = list(overhangs['ss_seq']) ss_overhang_sequences = {} overhang_ds_edges = {} overhang_ss_edges = {} for i in range(len(overhangs)): vertex = overhangs.iloc[i]['overhang'] vertex_location = locs[vertex] bp = overhangs.iloc[i]['ds_length'] length = bp * min_length / float(LENGTH_OF_SMALLEST) / 3.4 end = length * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2)) + np.array(vertex_location) overhang_ds_edges[vertex] = edge(vertex,vertex_location,end,overhang=True,bp_overhang=bp) #ss overhangs bp_ss = overhangs.iloc[i]['ss_length'] length_ss = bp_ss * min_length / float(LENGTH_OF_SMALLEST) / 3.4 start_ss = end end_ss = end + length_ss * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2)) overhang_ss_edges[vertex] = edge(vertex,start_ss,end_ss,overhang=True,bp_overhang=bp_ss,out_orientation=overhangs.iloc[i]['out_side']) ss_overhang_sequences[vertex] = overhangs.iloc[i]['ss_seq'] for i in edges + list(overhang_ds_edges.values()) + list(overhang_ss_edges.values()): i.normalize(min_length,LENGTH_OF_SMALLEST) else: for i in edges: i.normalize(min_length,LENGTH_OF_SMALLEST) single_stranded_dna = [] segs = {} for index, e in enumerate(edges): segs[tuple(e.index)] = ( mrdna.DoubleStrandedSegment(name = 'helix%s'%(index,), num_bp = e.nBp, start_position = e.nStart_c, end_position = e.nStop_c )) if overhangs is not None: #overhangs overhang_segs = {} for index in overhang_ds_edges: e = overhang_ds_edges[index] tmp = copy((mrdna.DoubleStrandedSegment(name = 'helix%s'%(index+100,), num_bp = int(e.nBp), start_position = list(e.nStart_c), end_position = list(e.nStop_c) ))) overhang_segs[e.index] = tmp #overhangs ss_overhang_segs = {} for index in overhang_ss_edges: e = overhang_ss_edges[index] if e.out_orientation == 5: tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index+200,), num_nt = int(e.nBp), start_position = list(e.nStart_c), end_position = list(e.nStop_c) ))) elif e.out_orientation == 3: tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index+200,), num_nt = int(e.nBp), start_position = list(e.nStop_c), end_position = list(e.nStart_c), ))) else: print ('BUG') #tmp.sequence = overhangs.iloc[index]['ss_seq'] tmp.sequence = ss_overhang_sequences[index] #APPLY SEQUENCE HERE USING THE NEW DATA STRUCTURE print (tmp.sequence) ss_overhang_segs[e.index]= (tmp) ssDNA_index = 0 if overhangs is not None: #CONNECT SS_DNA TO OVERHANGS for index in overhang_ss_edges: #connected if they have the same index! ss = overhang_ss_edges[index] if overhangs is not None: dsseg = overhang_segs[index] ssseg = ss_overhang_segs[index] #the dsDNA faces outwards if overhang_ss_edges[index].out_orientation == 3: overhang_segs[index].connect_end3(ss_overhang_segs[index]) elif overhang_ss_edges[index].out_orientation == 5: overhang_segs[index].connect_end5(ss_overhang_segs[index]) def intersection(lst1, lst2): lst3 = [value for value in lst1 if value in lst2] return lst3 for f in face_data: for con in f.connections: ### See if we have an overhang overhang_here = False if overhangs is not None: vertex_index = intersection(con[0],con[1])[0] face_index = f.index for i in range(len(overhangs)): if (overhangs.iloc[i]['face'] == face_index and overhangs.iloc[i]['overhang'] == vertex_index): overhang_here = True ss_here = overhangs.iloc[i]['ss_extra'] ### if overhang_here: print (f.index) ssDNA_index += 1 c1,c2 = con c1_positive = c1 in segs c2_positive = c2 in segs if SPACERS != 0: if overhang_here: #TODO : TIDAY ALL OF THIS UP! if ss_here == 0: print ('adding overhang here without ss!') if c1_positive: segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover") else: segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover") if c2_positive: segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover") else: segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") else: # add the connectivity here! print ('adding overhang here with spacers = %s!'%(ss_here,)) print (ss_here) if c1_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = segs[c1].end_position , end_position = overhang_segs[vertex_index].start_position, num_nt = ss_here)) segs[c1].connect_end3(ss) overhang_segs[vertex_index].connect_start5(ss) single_stranded_dna.append(ss) else: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = segs[c1[::-1]].start_position , end_position = overhang_segs[vertex_index].start_position, num_nt = ss_here)) segs[c1[::-1]].connect_start3(ss) overhang_segs[vertex_index].connect_start5(ss) single_stranded_dna.append(ss) if c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = overhang_segs[vertex_index].end_position , end_position = segs[c2].start_position, num_nt = ss_here)) overhang_segs[vertex_index].connect_start3(ss) segs[c2].connect_start5(ss) single_stranded_dna.append(ss) else: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = segs[c2[::-1]].end_position , end_position = overhang_segs[vertex_index].start_position, num_nt = ss_here)) overhang_segs[vertex_index].connect_start3(ss) segs[c2[::-1]].connect_end5(ss) single_stranded_dna.append(ss) else: r1 = np.random.rand(1)[0] - 0.5 r2 = np.random.rand(1)[0] - 0.5 if c1_positive and c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1].end_position + r1, end_position = segs[c2].start_position + r2, num_nt = SPACERS)) segs[c1].connect_end3(ss) segs[c2].connect_start5(ss) elif c1_positive and not c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1].end_position + r1, end_position = segs[c2[::-1]].end_position + r2, num_nt = SPACERS)) segs[c1].connect_end3(ss) segs[c2[::-1]].connect_end5(ss) elif not c1_positive and c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1[::-1]].start_position +r1, end_position = segs[c2].start_position + r2, num_nt = SPACERS)) segs[c1[::-1]].connect_start3(ss) segs[c2].connect_start5(ss) elif not c1_positive and not c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1[::-1]].start_position +r1 , end_position = segs[c2[::-1]].end_position + r2, num_nt = SPACERS)) segs[c1[::-1]].connect_start3(ss) segs[c2[::-1]].connect_end5(ss) single_stranded_dna.append(ss) else: #let's also add connectivity to the appropriate overhang... if overhang_here: #TODO: need to add additional spacers here! if ss_here == 0: if c1_positive: segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover") else: segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover") if c2_positive: segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover") else: segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") else: if c1_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = segs[c1].end_position , end_position = overhang_segs[vertex_index].start_position, num_nt = ss_here)) segs[c1].connect_end3(ss) overhang_segs[vertex_index].connect_start5(ss) single_stranded_dna.append(ss) else: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = segs[c1[::-1]].start_position , end_position = overhang_segs[vertex_index].start_position, num_nt = ss_here)) segs[c1[::-1]].connect_start3(ss) overhang_segs[vertex_index].connect_start5(ss) single_stranded_dna.append(ss) if c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = overhang_segs[vertex_index].end_position , end_position = segs[c2].start_position, num_nt = ss_here)) overhang_segs[vertex_index].connect_start3(ss) segs[c2].connect_start5(ss) single_stranded_dna.append(ss) else: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),), start_position = segs[c2[::-1]].end_position , end_position = overhang_segs[vertex_index].start_position, num_nt = ss_here)) overhang_segs[vertex_index].connect_start3(ss) segs[c2[::-1]].connect_end5(ss) single_stranded_dna.append(ss) if not overhang_here: if c1_positive and c2_positive: segs[c1].connect_end3(segs[c2].start5, type_="terminal_crossover") elif c1_positive and not c2_positive: segs[c1].connect_end3(segs[c2[::-1]].end5, type_="terminal_crossover") elif not c1_positive and c2_positive: segs[c1[::-1]].connect_start3(segs[c2].start5, type_="terminal_crossover") elif not c1_positive and not c2_positive: segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") #NICKS #TODO: just have nicks on non-overhang sides!!! #the list of faces which correspond to overhangs should be accessible no? no_nick_faces = [] if type(overhangs) != type(None): #if overhangs != None: no_nick_faces = list(overhangs['face']) if nicks: print ("adding nicks!") for f in face_data: nick = f.nick if ((nick in segs) and (f.index not in no_nick_faces)): segs[nick].add_nick(10,on_fwd_strand=True) else: print ('failure') #make sure that all the ssDNA has the sequence 'TTT...' for s in single_stranded_dna: s.sequence = s.num_nt * 'T' if overhangs is not None: segs_list = [segs[i] for i in segs] + single_stranded_dna + [overhang_segs[i] for i in overhang_segs] + [ss_overhang_segs[i] for i in ss_overhang_segs] else: segs_list = [segs[i] for i in segs] + single_stranded_dna return segs_list
def create_helix(self): self.helix = mrdna.DoubleStrandedSegment( str(np.random.rand()), self.nBp, start_position= self.nStart_c, end_position = self.nStop_c)
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS): locs,faces = read_ply(FNAME) mentioned_edges = set() face_data = [face(i) for i in faces] for i in face_data: for e in i.e: if e not in mentioned_edges and e[::-1] not in mentioned_edges: i.ori.append(1) mentioned_edges.add(e) else: i.ori.append(-1) edges = [] for i in mentioned_edges: edges.append( edge(i,locs[i[0]],locs[i[1]]) ) min_length = np.min([x.rawlen for x in edges]) max_length = np.max([x.rawlen for x in edges]) for i in edges: i.normalize(min_length,LENGTH_OF_SMALLEST) single_stranded_dna = [] segs = {} for index, e in enumerate(edges): segs[tuple(e.index)] = ( mrdna.DoubleStrandedSegment(name = 'helix%s'%(index,), num_bp = e.nBp, start_position = e.nStart_c, end_position = e.nStop_c )) ssDNA_index = 0 for f in face_data: for con in f.connections: ssDNA_index += 1 c1,c2 = con c1_positive = c1 in segs c2_positive = c2 in segs if c1_positive and c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1].end_position , end_position = segs[c2].start_position, num_nt = SPACERS)) segs[c1].connect_end3(ss) segs[c2].connect_start5(ss) elif c1_positive and not c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1].end_position , end_position = segs[c2[::-1]].end_position, num_nt = SPACERS)) segs[c1].connect_end3(ss) segs[c2[::-1]].connect_end5(ss) elif not c1_positive and c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1[::-1]].start_position , end_position = segs[c2].start_position, num_nt = SPACERS)) segs[c1[::-1]].connect_start3(ss) segs[c2].connect_start5(ss) elif not c1_positive and not c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1[::-1]].start_position , end_position = segs[c2[::-1]].end_position, num_nt = SPACERS)) #segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5) segs[c1[::-1]].connect_start3(ss) segs[c2[::-1]].connect_end5(ss) single_stranded_dna.append(ss) ''' if c1_positive and c2_positive: segs[c1].connect_end3(segs[c2].start5) elif c1_positive and not c2_positive: segs[c1].connect_end3(segs[c2[::-1]].end5) elif not c1_positive and c2_positive: segs[c1[::-1]].connect_start3(segs[c2].start5) elif not c1_positive and not c2_positive: segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5) else: print ('ohfuck') ''' #todo: add terminal crossovers to allow 0 free ssDNA. segs_list = [segs[i] for i in segs] + single_stranded_dna return segs_list
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS,overhangs = None): locs,faces = read_ply(FNAME) mentioned_edges = set() face_data = [] for index,i in enumerate(faces): print (FACE) face_data.append(FACE(i,index)) #NICKS #now we work out where the nicks are so we can choose the correct orientation of our strands. assign_nicks(face_data) nick_locs = [] for f in face_data: nick_locs.append(f.nick) mentioned_edges = set(nick_locs) #NICKS #choose orientation for i in face_data: for e in i.e: if e not in mentioned_edges and e[::-1] not in mentioned_edges: i.ori.append(1) mentioned_edges.add(e) else: i.ori.append(-1) edges = [] for i in mentioned_edges: edges.append( edge(i,locs[i[0]],locs[i[1]]) ) min_length = np.min([x.rawlen for x in edges]) max_length = np.max([x.rawlen for x in edges]) #ds overhangs #rework this data structure... if overhangs is not None: overhang_ds_edges = [] overhang_ss_edges = [] for i in range(len(overhangs)): vertex = overhangs.iloc[i]['overhang'] vertex_location = locs[vertex] ##ds overhangs bp = overhangs.iloc[i]['ds_length'] length = bp * min_length / float(LENGTH_OF_SMALLEST) / 3.4 end = length * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2)) + np.array(vertex_location) overhang_ds_edges.append(edge(vertex,vertex_location,end,overhang=True,bp_overhang=bp)) #ss overhangs bp_ss = overhangs.iloc[i]['ss_length'] length_ss = bp_ss * min_length / float(LENGTH_OF_SMALLEST) / 3.4 start_ss = end end_ss = end + length_ss * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2)) overhang_ss_edges.append(edge(vertex,start_ss,end_ss,overhang=True,bp_overhang=bp_ss,out_orientation=overhangs.iloc[i]['out_side'])) for i in edges + overhang_ds_edges + overhang_ss_edges: i.normalize(min_length,LENGTH_OF_SMALLEST) else: for i in edges: i.normalize(min_length,LENGTH_OF_SMALLEST) single_stranded_dna = [] segs = {} for index, e in enumerate(edges): segs[tuple(e.index)] = ( mrdna.DoubleStrandedSegment(name = 'helix%s'%(index,), num_bp = e.nBp, start_position = e.nStart_c, end_position = e.nStop_c )) if overhangs is not None: #overhangs overhang_segs = {} for index, e in enumerate(overhang_ds_edges): tmp = copy((mrdna.DoubleStrandedSegment(name = 'helix%s'%(index+100,), num_bp = int(e.nBp), start_position = list(e.nStart_c), end_position = list(e.nStop_c) ))) overhang_segs[e.index] = tmp #overhangs ss_overhang_segs = {} for index, e in enumerate(overhang_ss_edges): if e.out_orientation == 5: tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index+200,), num_nt = int(e.nBp), start_position = list(e.nStart_c), end_position = list(e.nStop_c) ))) elif e.out_orientation == 3: tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index,), num_nt = int(e.nBp), start_position = list(e.nStop_c), end_position = list(e.nStart_c), ))) else: print ('BUG') ss_overhang_segs[e.index]= (tmp) ssDNA_index = 0 if overhangs is not None: #CONNECT SS_DNA TO OVERHANGS for ss in overhang_ss_edges: #connected if they have the same index! index = ss.index if overhangs is not None: dsseg = overhang_segs[index] ssseg = ss_overhang_segs[index] #the dsDNA faces outwards if overhang_ss_edges[index].out_orientation == 3: overhang_segs[index].connect_end3(ss_overhang_segs[index]) elif overhang_ss_edges[index].out_orientation == 5: overhang_segs[index].connect_end5(ss_overhang_segs[index]) def intersection(lst1, lst2): lst3 = [value for value in lst1 if value in lst2] return lst3 for f in face_data: for con in f.connections: ### See if we have an overhang overhang_here = False if overhangs is not None: vertex_index = intersection(con[0],con[1])[0] face_index = f.index for i in range(len(overhangs)): if (overhangs.iloc[i]['face'] == face_index and overhangs.iloc[i]['overhang'] == vertex_index): overhang_here = True ### if overhang_here: print (f.index) #but obviously there isn't only one overhang? ssDNA_index += 1 c1,c2 = con c1_positive = c1 in segs c2_positive = c2 in segs if SPACERS != 0: if overhang_here: print ('adding overhang here!') if c1_positive: segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover") else: segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover") if c2_positive: segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover") else: segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") else: if c1_positive and c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1].end_position , end_position = segs[c2].start_position, num_nt = SPACERS)) segs[c1].connect_end3(ss) segs[c2].connect_start5(ss) elif c1_positive and not c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1].end_position , end_position = segs[c2[::-1]].end_position, num_nt = SPACERS)) segs[c1].connect_end3(ss) segs[c2[::-1]].connect_end5(ss) elif not c1_positive and c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1[::-1]].start_position , end_position = segs[c2].start_position, num_nt = SPACERS)) segs[c1[::-1]].connect_start3(ss) segs[c2].connect_start5(ss) elif not c1_positive and not c2_positive: ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,), start_position = segs[c1[::-1]].start_position , end_position = segs[c2[::-1]].end_position, num_nt = SPACERS)) #segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5) segs[c1[::-1]].connect_start3(ss) segs[c2[::-1]].connect_end5(ss) single_stranded_dna.append(ss) else: #let's also add connectivity to the appropriate overhang... if overhang_here: if c1_positive: segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover") else: segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover") overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover") if c2_positive: segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover") else: segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover") overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") if not overhang_here: if c1_positive and c2_positive: segs[c1].connect_end3(segs[c2].start5, type_="terminal_crossover") elif c1_positive and not c2_positive: segs[c1].connect_end3(segs[c2[::-1]].end5, type_="terminal_crossover") elif not c1_positive and c2_positive: segs[c1[::-1]].connect_start3(segs[c2].start5, type_="terminal_crossover") elif not c1_positive and not c2_positive: segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") #NICKS nicks = False if nicks == True: for f in face_data: nick = f.nick if nick in segs: segs[nick].add_nick(10,on_fwd_strand=True) else: print ('failure') if overhangs is not None: segs_list = [segs[i] for i in segs] + single_stranded_dna + [overhang_segs[i] for i in overhang_segs] + [ss_overhang_segs[i] for i in ss_overhang_segs] else: segs_list = [segs[i] for i in segs] + single_stranded_dna return segs_list
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS): locs, faces = read_ply(FNAME) mentioned_edges = set() face_data = [face(i) for i in faces] #NICKS #now we work out where the nicks are so we can choose the correct orientation of our strands. assign_nicks(face_data) nick_locs = [] for f in face_data: nick_locs.append(f.nick) mentioned_edges = set(nick_locs) #NICKS for i in face_data: for e in i.e: if e not in mentioned_edges and e[::-1] not in mentioned_edges: i.ori.append(1) mentioned_edges.add(e) else: i.ori.append(-1) edges = [] for i in mentioned_edges: edges.append(edge(i, locs[i[0]], locs[i[1]])) min_length = np.min([x.rawlen for x in edges]) max_length = np.max([x.rawlen for x in edges]) for i in edges: i.normalize(min_length, LENGTH_OF_SMALLEST) single_stranded_dna = [] segs = {} for index, e in enumerate(edges): segs[tuple(e.index)] = (mrdna.DoubleStrandedSegment( name='helix%s' % (index, ), num_bp=e.nBp, start_position=e.nStart_c, end_position=e.nStop_c)) ssDNA_index = 0 for f in face_data: for con in f.connections: ssDNA_index += 1 c1, c2 = con c1_positive = c1 in segs c2_positive = c2 in segs if SPACERS != 0: if c1_positive and c2_positive: ss = copy( mrdna.SingleStrandedSegment( "strand%s" % (ssDNA_index, ), start_position=segs[c1].end_position, end_position=segs[c2].start_position, num_nt=SPACERS)) segs[c1].connect_end3(ss) segs[c2].connect_start5(ss) elif c1_positive and not c2_positive: ss = copy( mrdna.SingleStrandedSegment( "strand%s" % (ssDNA_index, ), start_position=segs[c1].end_position, end_position=segs[c2[::-1]].end_position, num_nt=SPACERS)) segs[c1].connect_end3(ss) segs[c2[::-1]].connect_end5(ss) elif not c1_positive and c2_positive: ss = copy( mrdna.SingleStrandedSegment( "strand%s" % (ssDNA_index, ), start_position=segs[c1[::-1]].start_position, end_position=segs[c2].start_position, num_nt=SPACERS)) segs[c1[::-1]].connect_start3(ss) segs[c2].connect_start5(ss) elif not c1_positive and not c2_positive: ss = copy( mrdna.SingleStrandedSegment( "strand%s" % (ssDNA_index, ), start_position=segs[c1[::-1]].start_position, end_position=segs[c2[::-1]].end_position, num_nt=SPACERS)) #segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5) segs[c1[::-1]].connect_start3(ss) segs[c2[::-1]].connect_end5(ss) single_stranded_dna.append(ss) else: if c1_positive and c2_positive: segs[c1].connect_end3(segs[c2].start5, type_="terminal_crossover") elif c1_positive and not c2_positive: segs[c1].connect_end3(segs[c2[::-1]].end5, type_="terminal_crossover") elif not c1_positive and c2_positive: segs[c1[::-1]].connect_start3(segs[c2].start5, type_="terminal_crossover") elif not c1_positive and not c2_positive: segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") #NICKS for f in face_data: nick = f.nick if nick in segs: segs[nick].add_nick(5, on_fwd_strand=True) else: print('failure') # segs_list = [segs[i] for i in segs] + single_stranded_dna return segs_list