Exemple #1
0
def aptamer_structs_aff(fileNames, seqLength, roundNum, rounds='final'):
    if (rounds == 'final'):
        top_seq_info = [0, 0, np.infty]
        with open(fileNames + "_R" + str(roundNum), 'r') as f:
            for line in f:
                row = line.split()
                seq = str(row[0])
                count = int(row[1])
                dist = int(row[2])
                if (dist < top_seq_info[2]):
                    top_seq_info[0] = seq
                    top_seq_info[1] = count
                    top_seq_info[2] = dist
        with open(fileNames + "_R" + str(roundNum) + "_affstructure_info",
                  'w') as f:
            seq = top_seq_info[0]
            seq_struct = fold(seq)[0]
            seq_mfe = fold(seq)[1]
            seq_count = top_seq_info[1]
            seq_dist = top_seq_info[2]
            f.write(seq + '\t' + seq_struct + '\t' + str(seq_mfe) + '\t' +
                    str(seq_count) + '\t' + str(seq_dist) + '\n')
        svg_rna_plot(seq, seq_struct,
                     fileNames + "_R" + str(roundNum) + "_affstructure.svg")
        return 0
    elif (rounds == 'all'):
        top_seqs_info = []
        for rnd in xrange(roundNum):
            with open(fileNames + "_R" + str(rnd + 1), 'r') as f:
                for line in f:
                    row = line.split()
                    seq = str(row[0])
                    count = int(row[1])
                    dist = int(row[2])
                    if (dist > top_seq_info[2]):
                        top_seqs_info.append([seq, count, dist])
        with open(fileNames + "_R" + str(roundNum) + "_affstructures_info",
                  'w') as f:
            for rnd in xrange(roundNum):
                seq = top_seqs_info[rnd][0]
                seq_struct = fold(seq)[0]
                seq_mfe = fold(seq)[1]
                seq_count = top_seqs_info[rnd][1]
                seq_dist = top_seqs_info[rnd][2]
                f.write(seq + '\t' + seq_struct + '\t' + str(seq_mfe) + '\t' +
                        str(seq_count) + + '\t' + str(seq_dist) + '\n')
                svg_rna_plot(
                    seq, seq_struct,
                    fileNames + "_R" + str(rnd + 1) + "_affstructure.svg")
        return 0
    else:
        print("invalid option for string varible rounds. Exiting...")
Exemple #2
0
 def stochasticLoopSelection_initial(self, alphabetSet, seqLength, aptPool,
                                     selectionThreshold, totalSeqNum,
                                     samplingSize, outputFileNames, rnd,
                                     stringency):
     #sampling
     print("sampling from initial library...")
     randomSamples = random.randint(0,
                                    int(totalSeqNum - 1),
                                    size=samplingSize)
     sampleFileName = outputFileNames + "_samples_R" + str(rnd)
     with open(sampleFileName, 'w') as s:
         for seqIdx in randomSamples:
             seq = Apt.pseudoAptamerGenerator(seqIdx, alphabetSet,
                                              seqLength)
             s.write(seq + '\n')
     print("Sampling completed")
     #initialize seqInfo matrix
     slctdSeqs = {}
     selectedSeqs = 0
     aptStruct = fold(aptPool)[0]
     aptLoop = apt_loopFinder(aptPool, aptStruct, seqLength)
     print("Selection has started")
     #stochastic selection until threshold is met
     slctdSeqs = self.selectionProcess_loop_initial(slctdSeqs, aptPool,
                                                    aptStruct, aptLoop,
                                                    selectionThreshold,
                                                    alphabetSet, seqLength,
                                                    totalSeqNum, stringency)
     print("sequence selection has been carried out")
     return slctdSeqs
 def loop_func(self, seq1, seq1_struct, seq1_loop, seq2, seqLength):
     seq2_struct = fold(seq2)[0]
     base = None
     baseIdx = 0
     while(base != ')' and baseIdx < seqLength-1):
         base = seq2_struct[baseIdx]
         baseIdx += 1
     if(baseIdx == seqLength-1):
         while(base != '(' and baseIdx > 0):
             base = seq2_struct[baseIdx-1]
             baseIdx -= 1
         if(baseIdx == 0):
             seq2_loop = seq2
         else:
             seq2_loop = seq2[baseIdx:]
     else:
         loop_end = baseIdx-1
         while(base != '('):
             baseIdx -= 1
             base = seq2_struct[baseIdx-1]
         seq2_loop = seq2[baseIdx:loop_end]
     seq2_loopDist = self.lavenshtein_func(seq1_loop, seq2_loop)
     seq2_bpDist = bp_distance(seq1_struct, seq2_struct)
     seq2_dist = int(seq2_loopDist + seq2_bpDist)
     return seq2_dist
Exemple #4
0
 def loop_func(self, seq1, seq1_struct, seq1_loop, seq2, seqLength):
     #compute secondary structure of sequence
     seq2_struct = fold(seq2)[0]
     base = None
     baseIdx = 0
     #find a 3' paired nucleotide
     while (base != ')' and baseIdx < seqLength - 1):
         base = seq2_struct[baseIdx]
         baseIdx += 1
     if (baseIdx == seqLength - 1):
         while (base != '(' and baseIdx > 0):
             base = seq2_struct[baseIdx - 1]
             baseIdx -= 1
         if (baseIdx == 0):
             #sequence doesnt have a loop
             seq2_loop = seq2
         else:
             #sequence loop is dangling end
             seq2_loop = seq2[baseIdx:]
     else:
         #sequence has a loop
         loop_end = baseIdx - 1
         while (base != '('):
             baseIdx -= 1
             base = seq2_struct[baseIdx - 1]
         #grab loop
         seq2_loop = seq2[baseIdx:loop_end]
     #compute Lavenshtein distance
     seq2_loopDist = self.lavenshtein_func(seq1_loop, seq2_loop)
     #compute BP distance
     seq2_bpDist = bp_distance(seq1_struct, seq2_struct)
     #sum distances
     seq2_dist = int(seq2_loopDist + seq2_bpDist)
     return seq2_dist
Exemple #5
0
 def loop_components_func(self, seq1, seq1_struct, seq1_loop, seq2,
                          seqLength):
     seq2_struct = fold(seq2)[0]
     base = None
     baseIdx = 0
     while (base != ')' and baseIdx < seqLength - 1):
         base = seq2_struct[baseIdx]
         baseIdx += 1
     if (baseIdx == seqLength - 1):
         while (base != '(' and baseIdx > 0):
             base = seq2_struct[baseIdx - 1]
             baseIdx -= 1
         if (baseIdx == 0):
             seq2_loop = seq2
         else:
             seq2_loop = seq2[baseIdx:]
     else:
         loop_end = baseIdx - 1
         while (base != '('):
             baseIdx -= 1
             base = seq2_struct[baseIdx - 1]
         seq2_loop = seq2[baseIdx:loop_end]
     seq2_loopDist = self.lavenshtein_func(seq1_loop, seq2_loop)
     seq2_bpDist = bp_distance(seq1_struct, seq2_struct)
     return seq2_loopDist, seq2_bpDist
Exemple #6
0
    def _get_reward(self, terminal):
        """
        Compute the reward after assignment of all nucleotides.

        Args:
            terminal: Bool defining if final timestep is reached yet.

        Returns:
            The reward at the terminal timestep or 0 if not at the terminal timestep.
        """
        if not terminal:
            return 0

        folded_design, _ = fold(self.design.primary)
        hamming_distance = hamming(folded_design, self.target.dot_bracket)
        if 0 < hamming_distance < self._env_config.mutation_threshold:
            hamming_distance = self._local_improvement(folded_design)

        normalized_hamming_distance = hamming_distance / len(self.target)

        # For hparam optimization
        episode_info = EpisodeInfo(
            target_id=self.target.id,
            time=time.time(),
            normalized_hamming_distance=normalized_hamming_distance,
        )
        self.episodes_info.append(episode_info)

        return (1 -
                normalized_hamming_distance)**self._env_config.reward_exponent
Exemple #7
0
    def _local_improvement(self, folded_design):
        """
        Compute Hamming distance of locally improved candidate solutions.

        Returns:
            The minimum Hamming distance of all imporved candidate solutions.
        """
        differing_sites = _string_difference_indices(self.target.dot_bracket,
                                                     folded_design)
        hamming_distances = []
        for mutation in product("AGCU", repeat=len(differing_sites)):
            mutated = self.design.get_mutated(mutation, differing_sites)
            folded_mutated, _ = fold(mutated.primary)
            hamming_distance = hamming(folded_mutated, self.target.dot_bracket)
            hamming_distances.append(hamming_distance)
            if hamming_distance == 0:  # For better timing results
                return 0
        return min(hamming_distances)
Exemple #8
0
def distance_range(scale, ref_seq, seqLength, alphabetSet):
    ref_struct = fold(ref_seq)[0]
    ref_loop = apt_loopFinder(ref_seq, ref_struct)
    hamm_dist_array = np.zeros(int(seqLength * 1.5))
    bp_dist_array = np.zeros(int(seqLength * 1.5))
    loop_dist_array = np.zeros(int(seqLength * 1.5))
    randIdxs = random.randint(0, 4**(20) - 1, size=scale)
    for i in xrange(scale):
        randIdx = randIdxs[i]
        randSeq = apt.pseudoAptamerGenerator(randIdx, alphabetSet, seqLength)
        randHammDist = d.hamming_func(randSeq, ref_seq)
        randbpDist = d.bp_func(ref_struct, randSeq)
        randLoopDist = d.loop_func(ref_seq, ref_struct, ref_loop, randSeq,
                                   seqLength)
        hamm_dist_array[randHammDist] += 1
        bp_dist_array[randbpDist] += 1
        loop_dist_array[randLoopDist] += 1
    for dist in xrange(int(seqLength * 1.5)):
        hamm_dist_array[dist] /= scale
        bp_dist_array[dist] /= scale
        loop_dist_array[dist] /= scale
    fig, axis = plt.subplots(1, 1)
    distAxis = np.linspace(0, int(seqLength + 9), int(seqLength + 10))
    distAxis_smooth = np.linspace(0, int(seqLength + 9), 200)
    hamm_dist_smooth = spline(distAxis, hamm_dist_array, distAxis_smooth)
    bp_dist_smooth = spline(distAxis, bp_dist_array, distAxis_smooth)
    loop_dist_smooth = spline(distAxis, loop_dist_array, distAxis_smooth)
    axis.plot(distAxis_smooth, hamm_dist_smooth, label='Hamming')
    axis.plot(distAxis_smooth, bp_dist_smooth, label='Base-Pair')
    axis.plot(distAxis_smooth, loop_dist_smooth, label='Loop')
    axis.set_xlim([0, 25])
    axis.set_ylim([0, 0.4])
    axis.legend()
    fig.text(0.5, 0.04, 'Distance', ha='center')
    fig.text(0.04,
             0.5,
             'Fractional Frequency',
             va='center',
             rotation='vertical')
    fig.text(0.5, 0.95, 'Distance Distributions', ha='center')
    fig.savefig("SELEX_Analytics_distance_distributions", format='pdf')
    return hamm_dist_array
def check_reverse_rnafold(**kwargs):
    '''
    RNAfold is directional aware. Therefore generated rna graphs need to consider both directions.
    :param kwargs:
    :return:
    '''
    length = kwargs.get('length', 32)
    size = kwargs.get('size', 2e7)
    if not os.path.exists(
            os.path.join(basedir, 'data', 'rna_dataset_%d.csv' % (length))):
        generate_seq_dataset(size, length)

    with open(os.path.join(basedir, 'data', 'rna_dataset_%d.csv' % (length)),
              'r') as f:
        reader = pd.read_csv(f)
        seq_list = reader['seq']
        struct_list = reader['struct']
        for seq, struct in zip(seq_list, struct_list):
            reversed_struct = fold(seq[::-1])[0]
            if struct[::-1] != reversed_struct:
                print(seq, struct, reversed_struct)
Exemple #10
0
def _predict_rnalib(fasta_entry):
    from RNA import fold
    return (*fasta_entry, *fold(fasta_entry[1]))
Exemple #11
0
 def bp_func(self, seq1_struct, seq2):
     seq2_struct = fold(seq2)[0]
     seq2_dist = bp_distance(seq1_struct, seq2_struct)
     return seq2_dist
Exemple #12
0
 def bp_func(self, seq1_struct, seq2):
     seq2_struct = fold(seq2)[0]
     seq2_dist = bp_distance(seq1_struct, seq2_struct)
     return seq2_dist