Exemple #1
0
 def percent_gc(self):
     return 100*gc_content(self.sequence)
Exemple #2
0
 def percent_gc(self):
     return 100*gc_content(self.positive_sequence)
Exemple #3
0
 def percent_gc(self):
     return 100*gc_content(self.positive_amplicon)
Exemple #4
0
    def cut(self):
        left_padding = self.form_result['left_padding']
        right_padding = self.form_result['right_padding']
        enzyme = Session.query(Enzyme).get(self.form_result['enzyme'])
        cutseq = enzyme.cutseq
        
        # TODO change to single amplicon?
        if self.form_result['assay_id']:
            assay = Session.query(SequenceGroup).get(self.form_result['assay_id'])
            amplicon_tuples = pcr_sequences_snps_for_group(assay, padding_pos5=left_padding, padding_pos3=right_padding)
            sequences = []
            for amp, pseqs in amplicon_tuples:
                sequences.extend(pseqs)
        else:
            manual_seq = PCRSequence(SimpleGenomeSequence(0, 0, len(self.form_result['positive_sequence'])-1, '+',
                                                          full_sequence=self.form_result['positive_sequence']))
            manual_seq.snps = []
            sequences = [manual_seq]
        
        # TODO: this is arbitrary
        location_cut_data = self.__enzyme_cut_locations(sequences[0], [enzyme])
        
        # TODO support multiple sequences, somehow.
        
        pos_seq = sequences[0].merged_positive_sequence
        total_width = len(pos_seq)
        re_width_pct = 100*float(len(cutseq))/total_width
        
        enzyme_cut_data = location_cut_data[self.form_result['enzyme']]
        
        positive_matches = []
        negative_matches = []
        return_dict = {}
        snp_dict = dict([(s['name'], s) for s in sequences[0].snps])
        # keys here are going to be amplicon_cuts, left_cuts and right_cuts, left_cut
        for k, v in sorted(enzyme_cut_data.items()):
            blank, original_positives, original_negatives = v[0]
            snp_positives = []
            snp_negatives = []
            cancel_positives = []
            cancel_negatives = []
            for cuts in v[1:]:
                snp_name, shifted_positives, shifted_negatives = cuts
                snp = snp_dict[snp_name]
                # TODO: this is an oversimplification but will probably only result
                # in a shift in a particular restriction site
                #
                # TODO: I think this is sketchy right at the edges, needs to be tested. (> vs >=, etc)
                # TODO: the code could stand to be more compact as well.
                if len(shifted_positives) > len(original_positives):
                    found = False
                    for start, end, strand in shifted_positives:
                        if (snp['chromEnd'] >= pos_seq.start+start-1 and snp['chromEnd'] <= pos_seq.start+end) or \
                           (snp['chromStart'] >= pos_seq.start+start-1 and snp['chromStart'] <= pos_seq.start+end):
                            snp_positives.append((start, end, strand))
                            found = True
                    if not found:
                        pass
                        #raise Exception, "ERROR: additional positive strand restriction site not found by analyzing SNPs"
                
                elif len(shifted_positives) < len(original_positives):
                    found = False
                    for start, end, strand in original_positives:
                        if (snp['chromEnd'] >= pos_seq.start+start-1 and snp['chromEnd'] <= pos_seq.start+end) or \
                           (snp['chromStart'] >= pos_seq.start+start-1 and snp['chromStart'] <= pos_seq.start+end) or \
                           (pos_seq.start+start-1 >= snp['chromStart'] and pos_seq.start+end <= snp['chromEnd']):
                            if (start, end, strand) not in cancel_positives:
                                cancel_positives.append((start, end, strand))
                            found = True
                    if not found:
                        pass
                        #raise Exception, "ERROR: cancelled positive strand restriction site not found by analyzing SNPs"
                           
                if len(shifted_negatives) > len(original_negatives):
                    # find where the new snp is
                    found = False
                    for start, end, strand in shifted_negatives:
                        if (snp['chromEnd'] >= pos_seq.end-(end+1) and snp['chromEnd'] <= pos_seq.end-start) or \
                           (snp['chromStart'] >= pos_seq.end-(end+1) and snp['chromStart'] <= pos_seq.end-start):
                            snp_negatives.append((start, end, strand))
                            found = True
                    if not found:
                        pass
                        # insertion screws you here.
                        #raise Exception, (snp['chromEnd'], snp['chromStart'], pos_seq.end, pos_seq.end-(shifted_negatives[0][1]+1), pos_seq.end-(shifted_negatives[0][0]))
                        #raise Exception, "ERROR: additional negative strand restriction site not found by analyzing SNPs"
                    
                elif len(shifted_negatives) < len(original_negatives):
                    found = False
                    for start, end, strand in original_negatives:
                        if (snp['chromEnd'] >= pos_seq.end-(end+1) and snp['chromEnd'] <= pos_seq.end-start) or \
                           (snp['chromStart'] >= pos_seq.end-(end+1) and snp['chromStart'] <= pos_seq.end-start) or \
                           (pos_seq.end-(end+1) >= snp['chromStart'] and pos_seq.end-start <= snp['chromEnd']):
                            if (start, end, strand) not in cancel_negatives:
                                cancel_negatives.append((start, end, strand))
                            found = True
                    
                    if not found:
                        pass
                        #raise Exception, "ERROR: cancelling negative strand restriction site not found by analyzing SNPs"
            
            for tup in cancel_positives:
                original_positives.remove(tup)
            for tup in cancel_negatives:
                original_negatives.remove(tup)
            
            return_dict[k] = len(original_positives) + len(cancel_positives) \
                             + len(original_negatives) + len(cancel_negatives) \
                             + len(snp_positives) + len(snp_negatives)
            
            for start, end, strand in original_positives:
                positive_matches.append({'offset': start, 'pos': '%s%%' % (start*100.0/total_width), 'class': 'stable_re_site'})
            
            for start, end, strand in original_negatives:
                negative_matches.append({'offset': start, 'pos': '%s%%' % (100-(start*100.0/total_width)-re_width_pct), 'class': 'stable_re_site'})
            
            for start, end, strand in snp_positives:
                positive_matches.append({'offset': start, 'pos': '%s%%' % (start*100.0/total_width), 'class': 'snp_re_site'})
            
            for start, end, strand in snp_negatives:
                negative_matches.append({'offset': start, 'pos': '%s%%' % (100-(start*100.0/total_width)-re_width_pct), 'class': 'snp_re_site'})
            
            for start, end, strand in cancel_positives:
                positive_matches.append({'offset': start, 'pos': '%s%%' % (start*100.0/total_width), 'class': 'snp_cancel_re_site'})
            
            for start, end, strand in cancel_negatives:
                negative_matches.append({'offset': start, 'pos': '%s%%' % (100-(start*100.0/total_width)-re_width_pct), 'class': 'snp_cancel_re_site'})
                
        return_dict['positive_cuts'] = positive_matches
        return_dict['negative_cuts'] = negative_matches
        return_dict['re_width_pct'] = "%s%%" % re_width_pct

        # future out amplicon position
        amplicon_start = left_padding
        amplicon_end = len(pos_seq) - (right_padding+1)
        
        left_offsets = [match['offset'] for match in positive_matches if match['offset'] < (amplicon_start - len(cutseq))]
        if left_offsets:
            rightmost_left = max(left_offsets)+len(cutseq)
        else:
            rightmost_left = None
        
        right_offsets = [match['offset'] for match in positive_matches if match['offset'] > amplicon_end]
        if right_offsets:
            leftmost_right = min(right_offsets)
        else:
            leftmost_right = None
        
        amplicon_cuts = [match for match in positive_matches if match['offset'] >= amplicon_start and match['offset'] <= amplicon_end]
        
        # todo: bug if the cutters are asymmetric and the negative cutsite is shorter (redmine 669)
        if rightmost_left is not None and leftmost_right is not None and len(amplicon_cuts) == 0:
            inner_len = leftmost_right - rightmost_left
            inner_seq = pos_seq.sequence[rightmost_left:leftmost_right]
            inner_gc = gc_content(inner_seq)
            left_offset = amplicon_start - rightmost_left
            right_offset = leftmost_right - amplicon_end

            return_dict['fragment'] = {'len': inner_len,
                                       'loff': left_offset,
                                       'roff': right_offset,
                                       'gc': "%.2f%%" % (inner_gc*100)}
            
        return return_dict