def get_overhang_segs(overhang_ds_edges, overhang_ss_edges,
                      ss_overhang_sequences):
    overhang_segs = {}
    for index in overhang_ds_edges:
        e = overhang_ds_edges[index]
        tmp = copy(
            (mrdna.DoubleStrandedSegment(name='helix%s' % (index + 100, ),
                                         num_bp=int(e.nBp),
                                         start_position=list(e.nStart_c),
                                         end_position=list(e.nStop_c))))
        overhang_segs[e.index] = tmp

    ss_overhang_segs = {}
    for index in overhang_ss_edges:
        e = overhang_ss_edges[index]
        if e.out_orientation == 5:
            tmp = copy(
                (mrdna.SingleStrandedSegment(name='helix%s' % (index + 200, ),
                                             num_nt=int(e.nBp),
                                             start_position=list(e.nStart_c),
                                             end_position=list(e.nStop_c))))

        elif e.out_orientation == 3:
            tmp = copy((mrdna.SingleStrandedSegment(
                name='helix%s' % (index + 200, ),
                num_nt=int(e.nBp),
                start_position=list(e.nStop_c),
                end_position=list(e.nStart_c),
            )))
        tmp.sequence = ss_overhang_sequences[index]
        ss_overhang_segs[e.index] = (tmp)

    return (overhang_segs, ss_overhang_segs)
Beispiel #2
0
 def create_helix(self):
     if self.is_dsdna:
         self.dna = mrdna.DoubleStrandedSegment(name=self.name,
                                                 start_position = self.start_position,
                                                 end_position=self.end_position,
                                                 num_bp = self.num_bp)
     else:
         self.dna = mrdna.SingleStrandedSegment(name=self.name,
                                                 start_position = self.start_position,
                                                 end_position=self.end_position,
                                                 num_nt = self.num_bp)
def get_segs(edges):

    segs = {}

    for index, e in enumerate(edges):
        segs[tuple(e.index)] = (mrdna.DoubleStrandedSegment(
            name='helix%s' % (index, ),
            num_bp=e.nBp,
            start_position=e.nStart_c,
            end_position=e.nStop_c))

    return segs
def get_segments(FNAME,
                 LENGTH_OF_SMALLEST,
                 SPACERS,
                 nicks=1,
                 overhangs=None,
                 turberfieldnicks=0,
                 ldmoverhangs=None):

    tn = turberfieldnicks
    lo = ldmoverhangs

    face_data, locs = get_face_data(FNAME)

    assign_nicks(face_data)
    mentioned_edges = get_mentioned_edges(face_data)

    mentioned_edges = do_orientation_assignment(face_data, mentioned_edges)

    edges = [edge(i, locs[i[0]], locs[i[1]]) for i in mentioned_edges]

    min_length = np.min([x.rawlen for x in edges])

    if overhangs is not None:
        overhang_ds_edges, overhang_ss_edges, ss_overhang_sequences = get_overhang_edges(
            overhangs, locs, min_length, LENGTH_OF_SMALLEST)
        for i in edges + list(overhang_ds_edges.values()) + list(
                overhang_ss_edges.values()):
            i.normalize(min_length, LENGTH_OF_SMALLEST)

    else:
        for i in edges:
            i.normalize(min_length, LENGTH_OF_SMALLEST)

    segs = get_segs(edges)

    if overhangs is not None:
        overhang_segs, ss_overhang_segs = get_overhang_segs(
            overhang_ds_edges, overhang_ss_edges, ss_overhang_sequences)

    if overhangs is not None:
        connect_ssdna_to_overhangs(overhang_ss_edges, ss_overhang_segs,
                                   overhang_ds_edges, overhang_segs)

    single_stranded_dna = []
    ssDNA_index = 0

    ldm_breakable_segs = []

    for f in face_data:
        for con in f.connections:

            vertex_index = intersection(con[0], con[1])[0]
            face_index = f.index

            overhang_here, ss_here = is_there_an_overhang_here(
                f, con, overhangs)

            ssDNA_index += 1

            c1, c2 = con
            c1_positive = c1 in segs
            c2_positive = c2 in segs

            ldm_break = False
            if type(ldmoverhangs) != type(None):
                for i in range(len(ldmoverhangs)):
                    if (ldmoverhangs.iloc[i]['face'] == face_index and
                            ldmoverhangs.iloc[i]['overhang'] == vertex_index):
                        ldm_break = True

            if overhang_here:
                single_stranded_dna.extend(
                    join_overhang(ss_here, c1_positive, c2_positive,
                                  overhang_segs, segs, c1, c2, vertex_index))

            else:
                if not ldm_break:
                    single_stranded_dna.extend(
                        join_segments(c1_positive, c2_positive, segs, c1, c2,
                                      ssDNA_index, SPACERS))
                elif ldm_break:
                    #here we add our specific sequence... maybe just add a sequence, then some nones?
                    #work out which segs need to be broken up
                    segs_broken = []
                    for i in f.e:
                        if vertex_index in i:
                            if i[-1] == vertex_index:
                                segs_broken.append(i)
                            else:
                                segs_broken.append(i[::-1])

                    ldm_breakable_segs.append(segs_broken)
                    #reorient the segs broken appropriately, so the vertex is at the end.

    #break those segs!
    ldm_replacements = []

    for pair in ldm_breakable_segs:
        for seg_name in pair:
            if seg_name in segs:
                print('Woo')
                to_be_broken = segs[seg_name]

                segs.pop(seg_name)

                s = to_be_broken.start_position
                e = to_be_broken.end_position
                nts = to_be_broken.num_nt

                overhang_length = 14  #obviously actually find this from the file.

                # START SIDE.

                start_1 = s
                end_1 = s + (e - s) * (nts - overhang_length) / float(nts)
                nbp_1 = nts - overhang_length

                # OVERHANG SIDE.

                start_2 = end_1
                end_2 = e
                nbp_2 = overhang_length

                start_seg = mrdna.DoubleStrandedSegment(name='helix%s' %
                                                        (np.random.rand()),
                                                        num_bp=nbp_1,
                                                        start_position=start_1,
                                                        end_position=end_1)

                end_seg = mrdna.DoubleStrandedSegment(name='helix%s' %
                                                      (np.random.rand()),
                                                      num_bp=nbp_2,
                                                      start_position=start_2,
                                                      end_position=end_2)

                start_seg.connect_end3(
                    end_seg.start5
                )  #only works if direction of strand is anticlockwise around face
                #start_seg.connect_end5(end_seg.start3) #only works if direction of strand is anticlockwise around face

                #now we reconnect this guy

                breakpoint()
                #TODOTODOTODOTODOTODO

                print(to_be_broken)
                for connection in to_be_broken.connections:
                    A = connection.A
                    B = connection.B
                    breakpoint()
                    if B.container == to_be_broken:
                        if B.on_fwd_strand:
                            A.container.connections.append(
                                mrdna.segmentmodel.Connection(
                                    A, end_seg.start5, type_=connection.type_))
                        else:
                            A.container.connections.append(
                                mrdna.segmentmodel.Connection(
                                    A.end3, start_seg, type_=connection.type_))

                    else:
                        pass
                        '''
                        if A.on_fwd_strand:
                            #connect out end
                            B.container.connections.append(
                                mrdna.segmentmodel.Connection(B,end_seg, type_=connection.type_))
                        else:
                            B.container.connections.append(
                                mrdna.segmentmodel.Connection(B,start_seg, type_=connection.type_))
                        '''

                    breakpoint()

                #reconnect prexisting connections!

                ldm_replacements.append(start_seg)
                ldm_replacements.append(end_seg)
                #breakpoint()

            else:
                print('Boo!')

    no_nick_faces = []
    if type(overhangs) != type(None):
        #if overhangs != None:
        no_nick_faces = list(overhangs['face'])

    if nicks:
        print("adding nicks!")
        for f in face_data:

            nick = f.nick
            if ((nick in segs) and (f.index not in no_nick_faces)):
                segs[nick].add_nick(10, on_fwd_strand=True)

            else:
                print('failure')

    #make sure that all the ssDNA has the sequence 'TTT...'

    for s in single_stranded_dna:
        s.sequence = s.num_nt * 'T'

    if overhangs is not None:
        segs_list = [segs[i] for i in segs] + single_stranded_dna + [
            overhang_segs[i] for i in overhang_segs
        ] + [ss_overhang_segs[i] for i in ss_overhang_segs]

    else:
        segs_list = [segs[i] for i in segs] + single_stranded_dna

    return segs_list
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS,nicks=1,overhangs = None):

    locs,faces = read_ply(FNAME)

    mentioned_edges = set()
    face_data = []
    for index,i in enumerate(faces):
        print (FACE)
        face_data.append(FACE(i,index))
    
    #NICKS
    #now we work out where the nicks are so we can choose the correct orientation of our strands.
    assign_nicks(face_data)
    nick_locs = []
    for f in face_data:
        nick_locs.append(f.nick)
    mentioned_edges = set(nick_locs)
    #NICKS

    #choose orientation
    for i in face_data:
        for e in i.e:
            if e not in mentioned_edges and e[::-1] not in mentioned_edges:
                i.ori.append(1)
                mentioned_edges.add(e)
            else:
                i.ori.append(-1)

    edges = []

    for i in mentioned_edges:
        edges.append(
            edge(i,locs[i[0]],locs[i[1]])
        )

    min_length = np.min([x.rawlen for x in edges])
    max_length = np.max([x.rawlen for x in edges])

    #ds overhangs
    #rework this data structure...

    if overhangs is not None:
        
        #overhang_ss_edges_sequences = list(overhangs['ss_seq'])
        ss_overhang_sequences = {}
            
        overhang_ds_edges = {}
        overhang_ss_edges = {}

        for i in range(len(overhangs)):
            vertex = overhangs.iloc[i]['overhang'] 
            vertex_location = locs[vertex] 
            bp = overhangs.iloc[i]['ds_length'] 
            length = bp * min_length / float(LENGTH_OF_SMALLEST) / 3.4 
            end = length * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2)) + np.array(vertex_location)
            overhang_ds_edges[vertex] = edge(vertex,vertex_location,end,overhang=True,bp_overhang=bp)

            #ss overhangs        

            bp_ss = overhangs.iloc[i]['ss_length'] 
            length_ss = bp_ss * min_length / float(LENGTH_OF_SMALLEST) / 3.4 
            start_ss = end 
            end_ss = end + length_ss * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2))
            overhang_ss_edges[vertex] = edge(vertex,start_ss,end_ss,overhang=True,bp_overhang=bp_ss,out_orientation=overhangs.iloc[i]['out_side'])

            ss_overhang_sequences[vertex] = overhangs.iloc[i]['ss_seq']

        for i in edges + list(overhang_ds_edges.values()) + list(overhang_ss_edges.values()):
            i.normalize(min_length,LENGTH_OF_SMALLEST)
    else:
        for i in edges:
            i.normalize(min_length,LENGTH_OF_SMALLEST)
    

    single_stranded_dna = []

    segs = {}

    for index, e in enumerate(edges):
        segs[tuple(e.index)] = (
            mrdna.DoubleStrandedSegment(name = 'helix%s'%(index,),
                                num_bp = e.nBp,
                                start_position = e.nStart_c,  
                                end_position = e.nStop_c
                                       ))

    if overhangs is not None:
        #overhangs
        overhang_segs = {}
        for index in overhang_ds_edges:
            e = overhang_ds_edges[index] 
            tmp = copy((mrdna.DoubleStrandedSegment(name = 'helix%s'%(index+100,),
                                    num_bp = int(e.nBp),
                                    start_position = list(e.nStart_c),  
                                    end_position = list(e.nStop_c)
                                        )))
            overhang_segs[e.index] = tmp

        #overhangs
        ss_overhang_segs = {}
        for index in overhang_ss_edges:
            e = overhang_ss_edges[index]
            if e.out_orientation == 5:
                tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index+200,),
                                        num_nt = int(e.nBp),
                                        start_position = list(e.nStart_c),  
                                        end_position = list(e.nStop_c)
                                            )))
            
            elif e.out_orientation == 3:
                tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index+200,),
                                        num_nt = int(e.nBp),
                                        start_position = list(e.nStop_c),
                                        end_position = list(e.nStart_c),  
                                            )))
            else:
                print ('BUG')
            #tmp.sequence = overhangs.iloc[index]['ss_seq']
            tmp.sequence = ss_overhang_sequences[index]
            #APPLY SEQUENCE HERE USING THE NEW DATA STRUCTURE
            print (tmp.sequence)

            ss_overhang_segs[e.index]= (tmp)

    ssDNA_index = 0

    if overhangs is not None:
    #CONNECT SS_DNA TO OVERHANGS
        for index in overhang_ss_edges:
            #connected if they have the same index!
            ss = overhang_ss_edges[index]
            if overhangs is not None:
                dsseg = overhang_segs[index]
                ssseg = ss_overhang_segs[index]
                #the dsDNA faces outwards
                if overhang_ss_edges[index].out_orientation == 3:
                    overhang_segs[index].connect_end3(ss_overhang_segs[index])
                elif overhang_ss_edges[index].out_orientation == 5:
                    overhang_segs[index].connect_end5(ss_overhang_segs[index])

    def intersection(lst1, lst2): 
        lst3 = [value for value in lst1 if value in lst2] 
        return lst3 
  
    for f in face_data:

        for con in f.connections:
            ### See if we have an overhang 
            overhang_here = False
            if overhangs is not None:
                vertex_index = intersection(con[0],con[1])[0] 
                face_index = f.index    

                for i in range(len(overhangs)):
                    if (overhangs.iloc[i]['face'] == face_index and overhangs.iloc[i]['overhang'] == vertex_index):
                        overhang_here = True
                        ss_here = overhangs.iloc[i]['ss_extra']

                ###
                if overhang_here:
                    print (f.index)

            ssDNA_index += 1

            c1,c2 = con
            c1_positive = c1 in segs
            c2_positive = c2 in segs

            if SPACERS != 0:
            
                if overhang_here:
                    
                    #TODO : TIDAY ALL OF THIS UP!
                    if ss_here == 0: 

                        print ('adding overhang here without ss!')

                        if c1_positive:
                            segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover")
                        else:
                            segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover")
                        if c2_positive:
                            segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover")
                        else:
                            segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") 
                        
                    else:
                        # add the connectivity here!
                        print ('adding overhang here with spacers = %s!'%(ss_here,))

                        print (ss_here)

                        if c1_positive:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = segs[c1].end_position ,
                                end_position =   overhang_segs[vertex_index].start_position,
                                num_nt = ss_here))

                            segs[c1].connect_end3(ss)
                            overhang_segs[vertex_index].connect_start5(ss)
                            single_stranded_dna.append(ss)

                        else:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = segs[c1[::-1]].start_position ,
                                end_position =   overhang_segs[vertex_index].start_position,
                                num_nt = ss_here))

                            segs[c1[::-1]].connect_start3(ss)
                            overhang_segs[vertex_index].connect_start5(ss)
                            single_stranded_dna.append(ss)

                        if c2_positive:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = overhang_segs[vertex_index].end_position ,
                                end_position =   segs[c2].start_position,
                                num_nt = ss_here))

                            overhang_segs[vertex_index].connect_start3(ss)
                            segs[c2].connect_start5(ss)
                            single_stranded_dna.append(ss)
 
                        else:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = segs[c2[::-1]].end_position ,
                                end_position =   overhang_segs[vertex_index].start_position,
                                num_nt = ss_here))

                            overhang_segs[vertex_index].connect_start3(ss)
                            segs[c2[::-1]].connect_end5(ss)
                        
                            single_stranded_dna.append(ss)
                else:

                    r1 = np.random.rand(1)[0] - 0.5
                    r2 = np.random.rand(1)[0] - 0.5

                    if c1_positive and c2_positive:
                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1].end_position + r1,
                            end_position =   segs[c2].start_position + r2,
                            num_nt = SPACERS))

                        segs[c1].connect_end3(ss)
                        segs[c2].connect_start5(ss)

                    elif c1_positive and not c2_positive:
                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1].end_position + r1,
                            end_position =   segs[c2[::-1]].end_position + r2,
                            num_nt = SPACERS))

                        segs[c1].connect_end3(ss)
                        segs[c2[::-1]].connect_end5(ss)

                    elif not c1_positive and c2_positive:

                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1[::-1]].start_position +r1,
                            end_position =   segs[c2].start_position + r2,
                            num_nt = SPACERS))

                        segs[c1[::-1]].connect_start3(ss)
                        segs[c2].connect_start5(ss)

                    elif not c1_positive and not c2_positive:

                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1[::-1]].start_position +r1 ,
                            end_position =   segs[c2[::-1]].end_position + r2,
                            num_nt = SPACERS))      

                        segs[c1[::-1]].connect_start3(ss)
                        segs[c2[::-1]].connect_end5(ss)

                    single_stranded_dna.append(ss)

            else:
                #let's also add connectivity to the appropriate overhang...

                if overhang_here:

                    #TODO: need to add additional spacers here!
                    if ss_here == 0:
                        if c1_positive:
                            segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover")

                        else:
                            segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover")
                        if c2_positive:
                            segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover")
                        else:
                            segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                            overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover")
                    else:

                        if c1_positive:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = segs[c1].end_position ,
                                end_position =   overhang_segs[vertex_index].start_position,
                                num_nt = ss_here))

                            segs[c1].connect_end3(ss)
                            overhang_segs[vertex_index].connect_start5(ss)
                            single_stranded_dna.append(ss)

                        else:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = segs[c1[::-1]].start_position ,
                                end_position =   overhang_segs[vertex_index].start_position,
                                num_nt = ss_here))

                            segs[c1[::-1]].connect_start3(ss)
                            overhang_segs[vertex_index].connect_start5(ss)
                            single_stranded_dna.append(ss)

                        if c2_positive:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = overhang_segs[vertex_index].end_position ,
                                end_position =   segs[c2].start_position,
                                num_nt = ss_here))

                            overhang_segs[vertex_index].connect_start3(ss)
                            segs[c2].connect_start5(ss)
                            single_stranded_dna.append(ss)
 
                        else:

                            ss = copy(mrdna.SingleStrandedSegment("strand%s"%(np.random.randint(0,high = int(1e10)),),
                                start_position = segs[c2[::-1]].end_position ,
                                end_position =   overhang_segs[vertex_index].start_position,
                                num_nt = ss_here))

                            overhang_segs[vertex_index].connect_start3(ss)
                            segs[c2[::-1]].connect_end5(ss)
                        
                            single_stranded_dna.append(ss)
 
                if not overhang_here:
                    if c1_positive and c2_positive:
                        segs[c1].connect_end3(segs[c2].start5, type_="terminal_crossover")
                    elif c1_positive and not c2_positive:
                        segs[c1].connect_end3(segs[c2[::-1]].end5, type_="terminal_crossover")
                    elif not c1_positive and c2_positive:
                        segs[c1[::-1]].connect_start3(segs[c2].start5, type_="terminal_crossover")
                    elif not c1_positive and not c2_positive:
                        segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover")

        
    #NICKS
    #TODO: just have nicks on non-overhang sides!!!
    #the list of faces which correspond to overhangs should be accessible no?
    no_nick_faces = []
    if type(overhangs) != type(None):
    #if overhangs != None:
        no_nick_faces = list(overhangs['face'])

    if nicks:
        print ("adding nicks!")
        for f in face_data:

            nick = f.nick
            if ((nick in segs) and (f.index not in no_nick_faces)):
                segs[nick].add_nick(10,on_fwd_strand=True)
            else:
                print ('failure')

    #make sure that all the ssDNA has the sequence 'TTT...'

    for s in single_stranded_dna:
        s.sequence = s.num_nt * 'T'

    if overhangs is not None:
        segs_list = [segs[i] for i in segs] + single_stranded_dna + [overhang_segs[i] for i in overhang_segs] + [ss_overhang_segs[i] for i in ss_overhang_segs]
    else:
        segs_list = [segs[i] for i in segs] + single_stranded_dna


    return segs_list
 def create_helix(self):
     self.helix = mrdna.DoubleStrandedSegment(
                             str(np.random.rand()),
                             self.nBp, 
                             start_position= self.nStart_c,
                             end_position =  self.nStop_c)
Beispiel #7
0
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS):

    locs,faces = read_ply(FNAME)

    mentioned_edges = set()
    face_data = [face(i) for i in faces]

    for i in face_data:
        for e in i.e:
            if e not in mentioned_edges and e[::-1] not in mentioned_edges:
                i.ori.append(1)
                mentioned_edges.add(e)
            else:
                i.ori.append(-1)

    edges = []

    for i in mentioned_edges:
        edges.append(
            edge(i,locs[i[0]],locs[i[1]])
        )

    min_length = np.min([x.rawlen for x in edges])
    max_length = np.max([x.rawlen for x in edges])

    for i in edges:
        i.normalize(min_length,LENGTH_OF_SMALLEST)

    single_stranded_dna = []

    segs = {}

    for index, e in enumerate(edges):
        segs[tuple(e.index)] = (
            mrdna.DoubleStrandedSegment(name = 'helix%s'%(index,),
                                num_bp = e.nBp,
                                start_position = e.nStart_c,           
                                end_position = e.nStop_c
                                       ))

    ssDNA_index = 0
    for f in face_data:

        for con in f.connections:

            ssDNA_index += 1

            c1,c2 = con
            c1_positive = c1 in segs
            c2_positive = c2 in segs

            if c1_positive and c2_positive:

                ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                      start_position = segs[c1].end_position ,
                      end_position =   segs[c2].start_position,
                      num_nt = SPACERS))

                segs[c1].connect_end3(ss)
                segs[c2].connect_start5(ss)

            elif c1_positive and not c2_positive:
                ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                      start_position = segs[c1].end_position ,
                      end_position =   segs[c2[::-1]].end_position,
                      num_nt = SPACERS))

                segs[c1].connect_end3(ss)
                segs[c2[::-1]].connect_end5(ss)

            elif not c1_positive and c2_positive:

                ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                      start_position = segs[c1[::-1]].start_position ,
                      end_position =   segs[c2].start_position,
                      num_nt = SPACERS))

                segs[c1[::-1]].connect_start3(ss)
                segs[c2].connect_start5(ss)


            elif not c1_positive and not c2_positive:

                ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                      start_position = segs[c1[::-1]].start_position ,
                      end_position =   segs[c2[::-1]].end_position,
                      num_nt = SPACERS))      

                #segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5)

                segs[c1[::-1]].connect_start3(ss)
                segs[c2[::-1]].connect_end5(ss)

            single_stranded_dna.append(ss)

            '''
            if c1_positive and c2_positive:
                segs[c1].connect_end3(segs[c2].start5)
            elif c1_positive and not c2_positive:
                segs[c1].connect_end3(segs[c2[::-1]].end5)
            elif not c1_positive and c2_positive:
                segs[c1[::-1]].connect_start3(segs[c2].start5)
            elif not c1_positive and not c2_positive:
                segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5)
            else:
                print ('ohfuck') 
            '''
            
            #todo: add terminal crossovers to allow 0 free ssDNA.

    segs_list = [segs[i] for i in segs] + single_stranded_dna

    return segs_list
Beispiel #8
0
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS,overhangs = None):

    locs,faces = read_ply(FNAME)

    mentioned_edges = set()
    face_data = []
    for index,i in enumerate(faces):
        print (FACE)
        face_data.append(FACE(i,index))
    
    #NICKS
    #now we work out where the nicks are so we can choose the correct orientation of our strands.
    assign_nicks(face_data)
    nick_locs = []
    for f in face_data:
        nick_locs.append(f.nick)
    mentioned_edges = set(nick_locs)
    #NICKS

    #choose orientation
    for i in face_data:
        for e in i.e:
            if e not in mentioned_edges and e[::-1] not in mentioned_edges:
                i.ori.append(1)
                mentioned_edges.add(e)
            else:
                i.ori.append(-1)

    edges = []

    for i in mentioned_edges:
        edges.append(
            edge(i,locs[i[0]],locs[i[1]])
        )

    min_length = np.min([x.rawlen for x in edges])
    max_length = np.max([x.rawlen for x in edges])

    #ds overhangs
    #rework this data structure...

    if overhangs is not None:
            
        overhang_ds_edges = []
        overhang_ss_edges = []

        for i in range(len(overhangs)):
            vertex = overhangs.iloc[i]['overhang'] 
            vertex_location = locs[vertex] 
            ##ds overhangs
            bp = overhangs.iloc[i]['ds_length'] 
            length = bp * min_length / float(LENGTH_OF_SMALLEST) / 3.4 
            end = length * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2)) + np.array(vertex_location)
            overhang_ds_edges.append(edge(vertex,vertex_location,end,overhang=True,bp_overhang=bp))

            #ss overhangs        

            bp_ss = overhangs.iloc[i]['ss_length'] 
            length_ss = bp_ss * min_length / float(LENGTH_OF_SMALLEST) / 3.4 
            start_ss = end 
            end_ss = end + length_ss * np.array(vertex_location) / np.sqrt(np.sum(np.array(vertex_location)**2))
            overhang_ss_edges.append(edge(vertex,start_ss,end_ss,overhang=True,bp_overhang=bp_ss,out_orientation=overhangs.iloc[i]['out_side'])) 

        for i in edges + overhang_ds_edges + overhang_ss_edges:
            i.normalize(min_length,LENGTH_OF_SMALLEST)
    else:
        for i in edges:
            i.normalize(min_length,LENGTH_OF_SMALLEST)
    single_stranded_dna = []

    segs = {}

    for index, e in enumerate(edges):
        segs[tuple(e.index)] = (
            mrdna.DoubleStrandedSegment(name = 'helix%s'%(index,),
                                num_bp = e.nBp,
                                start_position = e.nStart_c,  
                                end_position = e.nStop_c
                                       ))

    if overhangs is not None:
        #overhangs
        overhang_segs = {}
        for index, e in enumerate(overhang_ds_edges):
            tmp = copy((mrdna.DoubleStrandedSegment(name = 'helix%s'%(index+100,),
                                    num_bp = int(e.nBp),
                                    start_position = list(e.nStart_c),  
                                    end_position = list(e.nStop_c)
                                        )))
            overhang_segs[e.index] = tmp

        #overhangs
        ss_overhang_segs = {}
        for index, e in enumerate(overhang_ss_edges):
            if e.out_orientation == 5:
                tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index+200,),
                                        num_nt = int(e.nBp),
                                        start_position = list(e.nStart_c),  
                                        end_position = list(e.nStop_c)
                                            )))
            
            elif e.out_orientation == 3:
                tmp = copy((mrdna.SingleStrandedSegment(name = 'helix%s'%(index,),
                                        num_nt = int(e.nBp),
                                        start_position = list(e.nStop_c),
                                        end_position = list(e.nStart_c),  
                                            )))
            else:
                print ('BUG')
            ss_overhang_segs[e.index]= (tmp)

    ssDNA_index = 0

    if overhangs is not None:
    #CONNECT SS_DNA TO OVERHANGS
        for ss in overhang_ss_edges:
            #connected if they have the same index!
            index = ss.index
            if overhangs is not None:
                dsseg = overhang_segs[index]
                ssseg = ss_overhang_segs[index]
                #the dsDNA faces outwards
            
                if overhang_ss_edges[index].out_orientation == 3:
                    overhang_segs[index].connect_end3(ss_overhang_segs[index])
                elif overhang_ss_edges[index].out_orientation == 5:
                    overhang_segs[index].connect_end5(ss_overhang_segs[index])

    def intersection(lst1, lst2): 
        lst3 = [value for value in lst1 if value in lst2] 
        return lst3 
  
    for f in face_data:

        for con in f.connections:
            ### See if we have an overhang 
            overhang_here = False
            if overhangs is not None:
                vertex_index = intersection(con[0],con[1])[0] 
                face_index = f.index    

                for i in range(len(overhangs)):
                    if (overhangs.iloc[i]['face'] == face_index and overhangs.iloc[i]['overhang'] == vertex_index):
                        overhang_here = True

                ###
                if overhang_here:
                    print (f.index)
                    #but obviously there isn't only one overhang?

            ssDNA_index += 1

            c1,c2 = con
            c1_positive = c1 in segs
            c2_positive = c2 in segs

            if SPACERS != 0:
            
                if overhang_here:
                    print ('adding overhang here!')

                    if c1_positive:
                        segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover")
                    else:
                        segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover")
                    if c2_positive:
                        segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover")
                    else:
                        segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover") 

                else:
                    
                    if c1_positive and c2_positive:
                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1].end_position ,
                            end_position =   segs[c2].start_position,
                            num_nt = SPACERS))

                        segs[c1].connect_end3(ss)
                        segs[c2].connect_start5(ss)

                    elif c1_positive and not c2_positive:
                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1].end_position ,
                            end_position =   segs[c2[::-1]].end_position,
                            num_nt = SPACERS))

                        segs[c1].connect_end3(ss)
                        segs[c2[::-1]].connect_end5(ss)

                    elif not c1_positive and c2_positive:

                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1[::-1]].start_position ,
                            end_position =   segs[c2].start_position,
                            num_nt = SPACERS))

                        segs[c1[::-1]].connect_start3(ss)
                        segs[c2].connect_start5(ss)

                    elif not c1_positive and not c2_positive:

                        ss = copy(mrdna.SingleStrandedSegment("strand%s"%(ssDNA_index,),
                            start_position = segs[c1[::-1]].start_position ,
                            end_position =   segs[c2[::-1]].end_position,
                            num_nt = SPACERS))      

                        #segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5)

                        segs[c1[::-1]].connect_start3(ss)
                        segs[c2[::-1]].connect_end5(ss)

                    single_stranded_dna.append(ss)



            else:
                #let's also add connectivity to the appropriate overhang...
                if overhang_here:

                    if c1_positive:
                        segs[c1].connect_end3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start5(segs[c1].end3, type_="terminal_crossover")
                    else:
                        segs[c1[::-1]].connect_start3(overhang_segs[vertex_index].start5, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start5(segs[c1[::-1]].start3, type_="terminal_crossover")
                    if c2_positive:
                        segs[c2].connect_start5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start3(segs[c2].start5, type_="terminal_crossover")
                    else:
                        segs[c2[::-1]].connect_end5(overhang_segs[vertex_index].start3, type_="terminal_crossover")
                        overhang_segs[vertex_index].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover")
                if not overhang_here:
                    if c1_positive and c2_positive:
                        segs[c1].connect_end3(segs[c2].start5, type_="terminal_crossover")
                    elif c1_positive and not c2_positive:
                        segs[c1].connect_end3(segs[c2[::-1]].end5, type_="terminal_crossover")
                    elif not c1_positive and c2_positive:
                        segs[c1[::-1]].connect_start3(segs[c2].start5, type_="terminal_crossover")
                    elif not c1_positive and not c2_positive:
                        segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5, type_="terminal_crossover")

        
    #NICKS
    nicks = False
    if nicks == True:
        for f in face_data:
            nick = f.nick
            if nick in segs:
                segs[nick].add_nick(10,on_fwd_strand=True)
            else:
                print ('failure')

    if overhangs is not None:
        segs_list = [segs[i] for i in segs] + single_stranded_dna + [overhang_segs[i] for i in overhang_segs] + [ss_overhang_segs[i] for i in ss_overhang_segs]
    else:
        segs_list = [segs[i] for i in segs] + single_stranded_dna

    return segs_list
def get_segments(FNAME, LENGTH_OF_SMALLEST, SPACERS):

    locs, faces = read_ply(FNAME)

    mentioned_edges = set()
    face_data = [face(i) for i in faces]

    #NICKS
    #now we work out where the nicks are so we can choose the correct orientation of our strands.
    assign_nicks(face_data)
    nick_locs = []
    for f in face_data:
        nick_locs.append(f.nick)
    mentioned_edges = set(nick_locs)
    #NICKS

    for i in face_data:
        for e in i.e:
            if e not in mentioned_edges and e[::-1] not in mentioned_edges:
                i.ori.append(1)
                mentioned_edges.add(e)
            else:
                i.ori.append(-1)

    edges = []

    for i in mentioned_edges:
        edges.append(edge(i, locs[i[0]], locs[i[1]]))

    min_length = np.min([x.rawlen for x in edges])
    max_length = np.max([x.rawlen for x in edges])

    for i in edges:
        i.normalize(min_length, LENGTH_OF_SMALLEST)

    single_stranded_dna = []

    segs = {}

    for index, e in enumerate(edges):
        segs[tuple(e.index)] = (mrdna.DoubleStrandedSegment(
            name='helix%s' % (index, ),
            num_bp=e.nBp,
            start_position=e.nStart_c,
            end_position=e.nStop_c))

    ssDNA_index = 0

    for f in face_data:

        for con in f.connections:

            ssDNA_index += 1

            c1, c2 = con
            c1_positive = c1 in segs
            c2_positive = c2 in segs

            if SPACERS != 0:
                if c1_positive and c2_positive:

                    ss = copy(
                        mrdna.SingleStrandedSegment(
                            "strand%s" % (ssDNA_index, ),
                            start_position=segs[c1].end_position,
                            end_position=segs[c2].start_position,
                            num_nt=SPACERS))

                    segs[c1].connect_end3(ss)
                    segs[c2].connect_start5(ss)

                elif c1_positive and not c2_positive:
                    ss = copy(
                        mrdna.SingleStrandedSegment(
                            "strand%s" % (ssDNA_index, ),
                            start_position=segs[c1].end_position,
                            end_position=segs[c2[::-1]].end_position,
                            num_nt=SPACERS))

                    segs[c1].connect_end3(ss)
                    segs[c2[::-1]].connect_end5(ss)

                elif not c1_positive and c2_positive:

                    ss = copy(
                        mrdna.SingleStrandedSegment(
                            "strand%s" % (ssDNA_index, ),
                            start_position=segs[c1[::-1]].start_position,
                            end_position=segs[c2].start_position,
                            num_nt=SPACERS))

                    segs[c1[::-1]].connect_start3(ss)
                    segs[c2].connect_start5(ss)

                elif not c1_positive and not c2_positive:

                    ss = copy(
                        mrdna.SingleStrandedSegment(
                            "strand%s" % (ssDNA_index, ),
                            start_position=segs[c1[::-1]].start_position,
                            end_position=segs[c2[::-1]].end_position,
                            num_nt=SPACERS))

                    #segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5)

                    segs[c1[::-1]].connect_start3(ss)
                    segs[c2[::-1]].connect_end5(ss)

                single_stranded_dna.append(ss)
            else:
                if c1_positive and c2_positive:
                    segs[c1].connect_end3(segs[c2].start5,
                                          type_="terminal_crossover")
                elif c1_positive and not c2_positive:
                    segs[c1].connect_end3(segs[c2[::-1]].end5,
                                          type_="terminal_crossover")
                elif not c1_positive and c2_positive:
                    segs[c1[::-1]].connect_start3(segs[c2].start5,
                                                  type_="terminal_crossover")
                elif not c1_positive and not c2_positive:
                    segs[c1[::-1]].connect_start3(segs[c2[::-1]].end5,
                                                  type_="terminal_crossover")

    #NICKS
    for f in face_data:
        nick = f.nick
        if nick in segs:
            segs[nick].add_nick(5, on_fwd_strand=True)
        else:
            print('failure')
    #

    segs_list = [segs[i] for i in segs] + single_stranded_dna

    return segs_list