Ejemplo n.º 1
0
def DSeqVH(fwd: str,
           rev: str = None,
           overhang: int = None,
           alphabet: int = AlphaEnum.DNA) -> VirtualHelix:
    '''Helper function for creating :class:`VirtualHelix` in the style of
    the :class:`DSeq` with strings
    '''
    dseq: DSeq = DSeq(fwd, rev, overhang, alphabet)
    overhang: int = dseq.overhang
    if overhang > 0:
        fwd_idx_offsets = [overhang]
        rev_idx_offsets = [0]
    else:
        fwd_idx_offsets = [0]
        rev_idx_offsets = [overhang]
    oligo_fwd = Oligo(fwd)
    if rev is None:
        rev = reverseComplement(fwd)
    oligo_rev = Oligo(rev)
    return VirtualHelix([oligo_fwd.strand5p], fwd_idx_offsets,
                        [oligo_rev.strand5p], rev_idx_offsets)
Ejemplo n.º 2
0
    def find_tms(cls,
                 sequences,
                 sodium=0.05,
                 magnesium=0.0,
                 temperature=25,
                 concentration=0.00000025,
                 **kwargs):
        cls.load()

        def flatten(iterable, remove_none=False, add_equal=False):
            """Make a flat list out of a list of lists"""
            # Will remove None
            if remove_none:
                if add_equal:
                    return [
                        '{}={}'.format(par, val) if (val != None) else par
                        for (par, val) in iterable
                    ]
                else:
                    return [
                        item for sublist in iterable for item in sublist
                        if item != None
                    ]
            else:
                if add_equal:
                    return [
                        '{}={}'.format(par, val) for (par, val) in iterable
                    ]
                else:
                    return [item for sublist in iterable for item in sublist]

        # RNAplex in 'probe mode' only calculates the reverse-complement Tm, and cannot
        # calculate the hairpin, homodimer, or heterodimer Tms.
        options = OrderedDict([
            ('--paramFile', cls.parameters),
            ('--probe-mode', None),
            ('--probe-concentration', Oligo.float_to_str(concentration)),
            ('--na-concentration', Oligo.float_to_str(sodium)),
            ('--mg-concentration', Oligo.float_to_str(magnesium)),
            ('--tris-concentration', Oligo.float_to_str(0.0)),
            ('--k-concentration', Oligo.float_to_str(0.0)),
            ('--temp', temperature),
        ])
        flat_options = flatten(options.items(),
                               remove_none=True,
                               add_equal=True)
        command_list = ['RNAplex'] + list(map(str, flat_options))
        command_str = ' '.join(command_list)

        cls.logger.info('command: {!r}'.format(command_str))

        cp = subprocess.run(command_list,
                            input=bytes('\n'.join(sequences), 'utf-8'),
                            shell=False,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.DEVNULL)
        # The output typically looks like this:
        #   Probe mode
        #   Concentration K:0.000 TNP:0.000 Mg:0.000 Na:0.050 probe:0.000
        #
        #                                                                                               sequence  DDSL98  DDSL04  DRSU95  RRXI98 CURRENT
        #                                                                                   AGGCTTTAGGGCTATAGGAA   51.78  50.76   50.74   62.06   52.13
        #                                                                                    CGAATTTAGAGCCTATAAT   43.60  42.90   39.52   48.07   43.46
        #                                                                                      GGCTATGAGATAGCTAA   43.14  42.68   41.24   52.81   43.44

        out_lines = cp.stdout.decode().splitlines()
        data_found = False

        tm_list = []
        for line in out_lines:
            line = line.rstrip()
            if not data_found:
                m = regex.search(
                    r'^\s+sequence\s+DDSL98\s+DDSL04\s+DRSU95\s+RRXI98\s+CURRENT',
                    line)
                if m:
                    data_found = True
            else:
                m = regex.match(r'^\s*(\S+)(?:\s+(\S+)){5}$', line)
                if m:
                    seq = m.captures(1)[0]
                    tm = float(m.captures(2)[-1])  # Use the 'CURRENT' column
                    tm_list.append(tm)

        return tm_list
Ejemplo n.º 3
0
    def breakStrand(self, dir_idx: int, strand: Strand,
                    idx: int) -> Tuple[Oligo, Oligo, Oligo]:
        '''Break a Strand in two and create two new Oligos to assign the all of
        the strands in the pre-existing Oligo to

        Args:
            dir_idx: is this on the forward [0] or reverse [1] direction of the
                :class:`VirtualHelix`.  Also use :enum:`VHDirEnum` to get these idxs
            strand: :class:`Strand` object to break
            idx: index to break the strand at in terms of it's sequence
        Returns:
            Two new :class:`Oligo` objects of form::

                 Oligo_5p,  Oligo_3p
        '''
        VHDirEnum.check(dir_idx)
        strand_array = self.strand_arrays[dir_idx]
        vh_strands: List[Strand] = strand_array.strands
        if strand not in vh_strands:
            dir_name: str = VHDirEnum(dir_idx).name
            err: str = "Strand {} not in the {} StrandArray of the VirtualHelix"
            raise ValueError(err.format(strand, dir_name))

        idx_offsets: List[int] = strand_array.idx_offsets

        seq: str = strand.seq
        oligo_old: Oligo = strand.oligo

        # 1. Do the 5' portion of the break
        oligo_break5p: Oligo = Oligo(seq[0:idx])
        strand_break5p: Strand = oligo_break5p.strand5p

        neighbor_5p: Strand = strand.strand5p
        if neighbor_5p is not None:  # update existing neighbor oligos
            strand_break5p.strand5p = neighbor_5p
            neighbor_5p.strand3p = strand_break5p
            for seg in neighbor_5p.gen5p():
                seg.oligo = oligo_break5p

        # 2. Do the 3' portion of the break
        oligo_break3p: Oligo = Oligo(seq[idx:])
        strand_break3p: Strand = oligo_break3p.strand5p

        neighbor_3p: Strand = strand.strand3p
        if neighbor_3p is not None:  # update existing neighbor oligos
            strand_break3p.strand3p = neighbor_3p
            neighbor_3p.strand5p = strand_break3p
            for seg in neighbor_3p.gen3p():
                seg.oligo = oligo_break3p

        # 3. Update the strands

        list_idx: int = vh_strands.index(strand)
        offset_5p: int = idx_offsets[list_idx]
        list_idx_plus_1: int = list_idx + 1

        vh_strands.insert(list_idx_plus_1, strand_break3p)
        vh_strands.insert(list_idx_plus_1, strand_break5p)

        idx_offsets.insert(list_idx_plus_1, offset_5p + len(strand_break5p))

        vh_strands.pop(list_idx)  # pop out the original strand

        return oligo_break5p, oligo_break3p
Ejemplo n.º 4
0
def str2Oligo(x: str) -> Tuple[bool, Oligo, Strand]:
    if isinstance(x, Strand):
        return False, x.oligo, x
    else:
        oligo = Oligo(x)
        return True, oligo, oligo.strand5p
Ejemplo n.º 5
0
    else:
        fwd_idx_offsets = [0]
        rev_idx_offsets = [overhang]
    oligo_fwd = Oligo(fwd)
    if rev is None:
        rev = reverseComplement(fwd)
    oligo_rev = Oligo(rev)
    return VirtualHelix([oligo_fwd.strand5p], fwd_idx_offsets,
                        [oligo_rev.strand5p], rev_idx_offsets)


# end def

if __name__ == '__main__':
    fwd = 'GGTCTCGAATTCAAA'
    oligo_fwd = Oligo(fwd)
    rev = 'TTTGAATTCGAGACC'
    oligo_rev = Oligo(rev)

    BsaI_vh = VirtualHelix([oligo_fwd.strand5p], [0], [oligo_rev.strand5p],
                           [0])
    print("1.\n%s" % BsaI_vh)
    BsaI_vh = DSeqVH(fwd, rev, 0)
    print("2.\n%s" % BsaI_vh)
    print(BsaI_vh.fwd_strands)
    BsaI_vh = DSeqVH(fwd)
    print("3.\n%s" % BsaI_vh)
    print("Da Oligos", BsaI_vh.oligos())
    strand0 = BsaI_vh.fwd_strands[0]
    print(strand0.oligo)
    broken_oligos = BsaI_vh.breakStrand(dir_idx=0, strand=strand0, idx=4)
Ejemplo n.º 6
0
    def nnn_unafold(cls,
                    folder,
                    seq1,
                    seq2=None,
                    sodium=0.05,
                    magnesium=0.0,
                    temperature=25,
                    concentration=0.00000025):
        """
        Calculate deltaG, deltaH, deltaS, Tm.
        Faster than running 'UNAFold.pl'.
        
        Accepts 1 or 2 input sequences. Automatically runs either:
         * Hairpin     (1 input sequence: A=seq1, UNAFold run on A)
         * Homodimer   (2 identical input sequences: A=seq1=seq2, UNAFold run on A & A)
         * Heterodimer (2 input sequences: A=seq1 B=seq2, UNAFold run on A & B)
         
        Writes '*.det' file to temp 'folder'
        
        Returns four lists:
          ([deltaG, ...], [deltaH, ...], [deltaS, ...], [Tm, ...])
        """

        # Create sequence files for input
        with open(os.path.join(folder, 'A.seq'), 'w') as flo:
            print(seq1, file=flo)
        if seq2:
            if (seq2 != seq1):
                with open(os.path.join(folder, 'B.seq'), 'w') as flo:
                    print(seq2, file=flo)

        # 1 sequence
        # @command = ('hybrid-ss', @rules, @rules2, @rules3, '--tracebacks'=> $max);
        # Only used when --model=PG
        #hybrid-ss --NA DNA --tmin 25 --tmax 25 --sodium=0.05 --magnesium=0.0 --suffix DAT A.seq # <-- not actually tested yet

        #output_basename=None # add to function arguments
        #if output_basename:
        #    outfile = os.path.join(folder, output_basename)
        #else:
        #    outfile = os.devnull

        parameters_1 = [
            '--NA=DNA',
            '--tmin=' + str(temperature),
            '--tmax=' + str(temperature),
            '--tinc=1',
            '--sodium=' + Oligo.float_to_str(sodium),
            '--magnesium=' + Oligo.float_to_str(magnesium),
            '--maxloop=' + str(30),
            '--mfold=' + ','.join(map(str, [5, -1, 100])),
        ]
        if (seq2 == None):
            prefix = 'A'
            # 1 sequence
            # Used by default, or when --model=EM
            # Creates files: A.ann, A.ct, A.dG, A.plot, A.run
            # Command: hybrid-ss-min --NA=DNA --tmin=25 --tmax=25 --tinc=1 --sodium=0.05 --magnesium=0.0 --maxloop=30 --mfold=5,-1,100 A.seq
            command_list = ['hybrid-ss-min'] + parameters_1 + ['A.seq']
        elif (seq1 == seq2):
            prefix = 'A-A'
            command_list = ['hybrid-min'] + parameters_1 + ['A.seq', 'A.seq']
        else:
            prefix = 'A-B'
            command_list = ['hybrid-min'] + parameters_1 + ['A.seq', 'B.seq']

        try:
            with open(os.devnull, 'w+') as flo:  # Prevent printing to STDOUT
                cp = subprocess.run(command_list,
                                    shell=False,
                                    check=True,
                                    cwd=folder,
                                    stdout=flo,
                                    stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError:
            # Some sequences, such as 'GAGAAGGAGAAGGAGAAG' paired with itself, will cause a segmentation fault
            return [math.inf], [math.inf], [math.nan], [math.nan]

        # If model=EM
        # Apparently, this is not needed
        # Creates file: A.h-num
        # Command: h-num.pl A

    #    with open(outfile, 'w+') as flo:
    #        command_list = ['h-num.pl', 'A']
    #        cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=subprocess.STDOUT)

    # Apparently, this is not needed
    # Creates file: A.ss-count
    # Command: ss-count.pl A.ct > A.ss-count
    #    with open('A.ss-count', 'w') as flo:
    #        command_list = ['ss-count.pl', 'A.ct']
    #        cp = subprocess.run(command_list, shell=False, check=True, cwd=folder, stdout=flo, stderr=None)

    # This calculates delta-H
    # if there is no --suffix option used '--suffix=DHD' overrides '--NA=DNA --sodium=0.05 --magnesium=0.0 --temperature=25'
    # Command: ct-energy --suffix=DHD A.ct > A.deltaH
        command_list = ['ct-energy', '--suffix=DHD', prefix + '.ct']
        cp = subprocess.run(command_list,
                            shell=False,
                            check=True,
                            cwd=folder,
                            stdout=subprocess.PIPE,
                            stderr=None)
        deltaH_list = []
        for line in cp.stdout.decode().splitlines():
            deltaH_list.append(float(line))

        # This gives more precise delta-G calculations
        # Command: ct-energy --NA=DNA --sodium=0.05 --magnesium=0.0 --temperature=25 A.ct > A.deltaG
        parameters_2 = [
            '--NA=DNA',
            '--temperature=' + str(temperature),
            '--sodium=' + Oligo.float_to_str(sodium),
            '--magnesium=' + Oligo.float_to_str(magnesium),
        ]
        command_list = ['ct-energy'] + parameters_2 + [prefix + '.ct']
        cp = subprocess.run(command_list,
                            shell=False,
                            check=True,
                            cwd=folder,
                            stdout=subprocess.PIPE,
                            stderr=None)
        deltaG_list = []
        for line in cp.stdout.decode().splitlines():
            deltaG_list.append(float(line))

        # This gives the complicated information within the A.det file...
        #ct-energy --NA=DNA --sodium=0.05 --magnesium=0.0 --temperature=25 --verbose A.ct | ct-energy-det.pl --mode text

        number_structures = len(deltaH_list)
        homodimer_list = cls.parse_ct_file(os.path.join(
            folder, prefix + '.ct'
        ))  # Check each pair to determine if it's a h**o- or heterodimer
        #deltaH_list = [-32.4, -49.8]
        #deltaG_list = [-3.43067, -2.56959]
        deltaS_list = []
        Tm_list = []
        factor_list = []

        #temperature = 25 # argument
        #concentration = 0.00000025 # argument
        R = 0.0019872  # Constant

        try:
            for i in range(number_structures):
                #if (len(homodimer_list) > i):
                #    homodimer = homodimer_list[i]
                #else:
                #    homodimer = None
                homodimer = homodimer_list[i]
                deltaH = deltaH_list[i]
                deltaG = deltaG_list[i]

                deltaS = 1000.0 * (deltaH - deltaG) / (273.15 + temperature)
                if (homodimer != None):
                    if (homodimer == True):
                        factor = 1
                    else:
                        factor = 4
                    Tm = 1000.0 * deltaH / (
                        deltaS + 1000.0 * R * math.log(concentration / factor)
                    ) - 273.15  # Natural log (base e)
                else:
                    Tm = 1000.0 * deltaH / deltaS - 273.15
                    factor = None

                deltaS_list.append(deltaS)
                Tm_list.append(Tm)
                factor_list.append(factor)
        except IndexError:
            # Problematic sequences, such as 'TTCTCCACTTCCATCACC' will cause an error
            # So we artificially give them unreasonable results so they will be discarded
            for i in range(number_structures):
                deltaS_list.append(math.nan)
                Tm_list.append(math.nan)
                factor_list.append(None)

        # Write the det file
        with open(os.path.join(folder, prefix + '.det'), 'w') as flo:
            for i in range(number_structures):
                print(
                    'Structure {}: dG = {}  dH = {}  dS = {}  Tm = {}'.format(
                        i + 1, deltaG_list[i], deltaH_list[i], deltaS_list[i],
                        Tm_list[i]),
                    file=flo)

        # Create Probability dot plot
        #my @command = ('hybrid-plot-ng', '--temperature' => $temp)
        #system(@command)

        # Create Energy dot plot
        #my @command = ('boxplot_ng', '-d', -c => 4);
        #system(@command)

        # Create structure plots
        #system($sirgraph, @flags, -ss => "${prefix}_$fold")
        #system($sirgraph, @flags, -p => "${prefix}_$fold")
        #system($sirgraph, @flags, $img, "${prefix}_$fold")
        #system('ps2pdfwr', "${prefix}_$fold.ps")

        # ????
        #system('ct2rnaml', $prefix)

        # These are all of the float data type
        return deltaG_list, deltaH_list, deltaS_list, Tm_list
Ejemplo n.º 7
0
    def calculate_simple(cls,
                         folder_p,
                         seq1,
                         seq2=None,
                         sodium=0.05,
                         magnesium=0.0,
                         temperature=25,
                         concentration=0.00000025,
                         output_basename=None):
        """
        Writes files to temp folder 'folder_p'
        If 1 input sequence, then UNAFold is run on seq1 only
        If 2 input sequences, then UNAFold run on seq1+seq2
        """
        # Make temporary folder
        folder = os.path.join(folder_p, 'oligos')
        os.makedirs(folder, exist_ok=True)

        # Create sequence files for input
        with open(os.path.join(folder, 'A.seq'), 'w') as flo:
            print(seq1, file=flo)
        if seq2:
            with open(os.path.join(folder, 'B.seq'), 'w') as flo:
                print(seq2, file=flo)

        # Default concetrations: 0.25 uM = 0.00025 mM = 0.00000025 M
        basic_command_list = [
            'UNAFold.pl', '--NA=DNA', '--temp=' + str(temperature),
            '--sodium=' + Oligo.float_to_str(sodium),
            '--magnesium=' + Oligo.float_to_str(magnesium),
            '--Ct=' + Oligo.float_to_str(concentration), '--max=100'
        ]
        if output_basename:
            outfile = os.path.join(folder, output_basename)
        else:
            outfile = os.devnull

        with open(outfile, 'w+') as flo:
            if seq2:
                # Do heterodimer
                command_list = basic_command_list + ['A.seq', 'B.seq']
                cp = subprocess.run(command_list,
                                    shell=False,
                                    check=True,
                                    cwd=folder,
                                    stdout=flo,
                                    stderr=subprocess.STDOUT)
            else:
                # Do hairpins
                try:
                    command_list = basic_command_list + ['A.seq']
                    cp = subprocess.run(command_list,
                                        shell=False,
                                        check=True,
                                        cwd=folder,
                                        stdout=flo,
                                        stderr=subprocess.STDOUT)
                except subprocess.CalledProcessError:  # This error happens sometimes, i.e. when A.seq contains 'TTCTCCACTTCCATCACC'
                    # UNAFold.pl crashes, so we just write a crappy 'A.det' file
                    print("UNAFold.pl CRASH")
                    with open(os.path.join(folder, 'A.det'), 'w') as crash_flo:
                        print(
                            'Structure 1: dG = -999.0  dH = -999.0  dS = -999.0  Tm = 99.0',
                            file=crash_flo)

        # Make the objects
        if seq2:
            return cls.make_objects(os.path.join(folder,
                                                 'A-B.det'), sodium, magnesium,
                                    temperature, concentration, seq1, seq2)
        else:
            return cls.make_objects(os.path.join(folder,
                                                 'A.det'), sodium, magnesium,
                                    temperature, concentration, seq1)
Ejemplo n.º 8
0
    def calculate_full(cls,
                       folder_p,
                       seq1,
                       seq2,
                       sodium=0.05,
                       magnesium=0.0,
                       temperature=25,
                       concentration=0.00000025,
                       output_basename=None):
        # Make temporary folder
        folder = os.path.join(folder_p, 'oligos')
        os.makedirs(folder, exist_ok=True)

        # Create sequence files for input
        with open(os.path.join(folder, 'A.seq'), 'w') as flo:
            print(seq1, file=flo)
        with open(os.path.join(folder, 'B.seq'), 'w') as flo:
            print(seq2, file=flo)
        with open(os.path.join(folder, 'rcA.seq'), 'w') as flo:
            print(rc(seq1), file=flo)
        with open(os.path.join(folder, 'rcB.seq'), 'w') as flo:
            print(rc(seq2), file=flo)

        # Default concetrations: 0.25 uM = 0.00025 mM = 0.00000025 M
        basic_command_list = [
            'UNAFold.pl', '--NA=DNA', '--temp=' + str(temperature),
            '--sodium=' + Oligo.float_to_str(sodium),
            '--magnesium=' + Oligo.float_to_str(magnesium),
            '--Ct=' + Oligo.float_to_str(concentration), '--max=100'
        ]
        if output_basename:
            outfile = os.path.join(folder, output_basename)
        else:
            outfile = os.devnull

        with open(outfile, 'w+') as flo:
            # Do hairpins
            command_list = basic_command_list + ['A.seq']
            cp = subprocess.run(command_list,
                                shell=False,
                                check=True,
                                cwd=folder,
                                stdout=flo,
                                stderr=subprocess.STDOUT)
            command_list = basic_command_list + ['B.seq']
            cp = subprocess.run(command_list,
                                shell=False,
                                check=True,
                                cwd=folder,
                                stdout=flo,
                                stderr=subprocess.STDOUT)

            # Do homodimers
            command_list = basic_command_list + ['A.seq', 'A.seq']
            cp = subprocess.run(command_list,
                                shell=False,
                                check=True,
                                cwd=folder,
                                stdout=flo,
                                stderr=subprocess.STDOUT)
            command_list = basic_command_list + ['B.seq', 'B.seq']
            cp = subprocess.run(command_list,
                                shell=False,
                                check=True,
                                cwd=folder,
                                stdout=flo,
                                stderr=subprocess.STDOUT)

            # Do heterodimer
            command_list = basic_command_list + ['A.seq', 'B.seq']
            cp = subprocess.run(command_list,
                                shell=False,
                                check=True,
                                cwd=folder,
                                stdout=flo,
                                stderr=subprocess.STDOUT)

            # Do reverse complements
            command_list = basic_command_list + ['A.seq', 'rcA.seq']
            cp = subprocess.run(command_list,
                                shell=False,
                                check=True,
                                cwd=folder,
                                stdout=flo,
                                stderr=subprocess.STDOUT)
            command_list = basic_command_list + ['B.seq', 'rcB.seq']
            cp = subprocess.run(command_list,
                                shell=False,
                                check=True,
                                cwd=folder,
                                stdout=flo,
                                stderr=subprocess.STDOUT)

        # Make the objects
        #for det_filename in ['A.det', 'B.det', 'A-A.det', 'B-B.det', 'A-B.det']:
        #    pass
        a = cls.make_objects(os.path.join(folder, 'A.det'), sodium, magnesium,
                             temperature, concentration, seq1)
        b = cls.make_objects(os.path.join(folder, 'B.det'), sodium, magnesium,
                             temperature, concentration, seq2)
        aa = cls.make_objects(os.path.join(folder,
                                           'A-A.det'), sodium, magnesium,
                              temperature, concentration, seq1, seq1)
        bb = cls.make_objects(os.path.join(folder,
                                           'B-B.det'), sodium, magnesium,
                              temperature, concentration, seq2, seq2)
        ab = cls.make_objects(os.path.join(folder,
                                           'A-B.det'), sodium, magnesium,
                              temperature, concentration, seq1, seq2)
        ra = cls.make_objects(os.path.join(folder, 'A-rcA.det'), sodium,
                              magnesium, temperature, concentration, seq1,
                              rc(seq1))
        rb = cls.make_objects(os.path.join(folder, 'B-rcB.det'), sodium,
                              magnesium, temperature, concentration, seq2,
                              rc(seq2))

        return a, b, aa, bb, ab, ra, rb