Python load_file Examples, hjh.processing.load_file Python Examples

Example #1

0

Show file

def find_lengths_h1lengths_tar(j_len):
    """Return all lengths corresponding to 'standard' mode"""
    lengths = [8] * 3 + [9] * 4 + [10] * 5 + [11] * 5
    offsets = [-1, 0, 1] + [-2, -1, 0, 1] + [-2, -1, 0, 1, 2
                                             ] + [-2, -1, 0, 1, 2]
    h1_lengths = [
        convert_offset_to_h1length(length - j_len, i)
        for i, length in zip(offsets, lengths)
    ]
    return lengths, h1_lengths


if __name__ == '__main__':
    args = parser.parse_args()
    helix_seqs = processing.load_file(args.helix_seqs)
    junction_seqs = processing.load_file(args.junction_seqs)

    if args.flank_to_add:
        base_before, base_after = args.flank_to_add.split('_')
        junction_seqs.loc[:,
                          'no_flank'] = junction_seqs.side1 + '_' + junction_seqs.side2
        side1 = base_before + junction_seqs.side1 + base_after
        side2 = seqfun.rc(base_after,
                          rna=True) + junction_seqs.side2 + seqfun.rc(
                              base_before, rna=True)
        junction_seqs.loc[:, 'side1'] = side1
        junction_seqs.loc[:, 'side2'] = side2
        junction_seqs.loc[:, 'flank'] = args.flank_to_add

    if args.switch_sides:

Example #2

0

Show file

##### IMPORT #####
import numpy as np
import pandas as pd
import os
import sys
import argparse
import itertools

from hjh import processing

# load sequences
junction_seqs = processing.load_file(
    '~/JunctionLibrary/seq_params/three_way_junctions.dat')

# cut out the 'extra'
junction_seq_controls = {}
for idx, row in junction_seqs.iterrows():
    for loop_context in ['L1', 'L2']:
        if loop_context == 'L1':
            two_way_seq = pd.Series({
                'side1': row.j1,
                'side2': row.j2[0] + row.j3[-1]
            })

        elif loop_context == 'L2':
            two_way_seq = pd.Series({
                'side1': row.j1[0] + row.j2[-1],
                'side2': row.j3
            })
        junction_seq_controls[(idx, loop_context)] = pd.concat(
            [two_way_seq, row.drop(['j1', 'j2', 'j3'])])

Example #3

0

Show file

File: get_threeway_controls.py Project: sjkdenny/JunctionLibrary

##### IMPORT #####
import numpy as np
import pandas as pd
import os
import sys
import argparse
import itertools

from hjh import processing


# load sequences
junction_seqs = processing.load_file('~/JunctionLibrary/seq_params/three_way_junctions.dat')

# cut out the 'extra'
junction_seq_controls = {}
for idx, row in junction_seqs.iterrows():
    for loop_context in ['L1','L2']:
        if loop_context == 'L1':
            two_way_seq = pd.Series({'side1':row.j1,
                                     'side2':row.j2[0] + row.j3[-1]})

        elif loop_context == 'L2':
            two_way_seq = pd.Series({'side1':row.j1[0] + row.j2[-1],
                                     'side2':row.j3})
        junction_seq_controls[(idx, loop_context)] = pd.concat([
                two_way_seq, row.drop(['j1', 'j2', 'j3'])])

        
junction_seq_controls = pd.concat(junction_seq_controls, names=['index', 'loop_context']).unstack().swaplevel(0,1).sort_index()
for loop_context in ['L1', 'L2']:

Example #4

0

Show file

parser.add_argument(
    '-s',
    '--seqs',
    help='filename of things to mutate with the '
    'positions in which to mutate (side1, side2, positions), i.e. '
    '~/JunctionLibrary/seq_params/receptors_expt3_original.dat',
    required=True)
parser.add_argument('-out',
                    '--out_file',
                    help='file to save output',
                    required=True)

if __name__ == '__main__':
    args = parser.parse_args()

    receptors = processing.load_file(args.seqs)
    script = 'python ~/JunctionLibrary/mutate_seqs.py -s {in_file} -out {out_file} -p {positions}'
    working_dir = './'
    # groupby side length
    out_filenames = []
    for name, group in receptors.groupby('positions'):
        # make name machine friendly
        filename = working_dir + os.path.basename(
            os.path.splitext(args.seqs)[0] + '_' +
            name.replace(';', '.').replace(',', ''))
        in_filename, out_filename = filename + '.dat', filename + '_muts.dat',
        group.drop('positions', axis=1).to_csv(in_filename,
                                               index=False,
                                               sep='\t')
        call = script.format(in_file=in_filename,
                             out_file=out_filename,

Example #5

0

Show file

# Provides modular tools for making a helix-junction
# helix DNA library

##### IMPORT #####
import numpy as np
import pandas as pd
import os
import sys
import argparse
import itertools

from hjh import processing

#set up command line argument parser
parser = argparse.ArgumentParser(description="script for making library")
parser.add_argument(
    '-s',
    '--seqs',
    help='filenames with column "seq" to test ss structure of.')
parser.add_argument('-out',
                    '--out_file',
                    help='file to save output',
                    required=True)

if __name__ == '__main__':
    args = parser.parse_args()
    seqs = processing.load_file(args.seqs)
    seqs.loc[:, 'ss'] = processing.check_ss_structure_set(seqs)

    ## save
    seqs.to_csv(args.out_file, index=False, sep='\t')

Example #6

0

Show file

File: make_helix_seqs_predefined.py Project: sjkdenny/JunctionLibrary

                    'junction location already defined (h1_side1, h2_side1, h2_side2, h2_side1).'
                    ' If given, all other options (i.e. length, offsets) are ignored.')

parser.add_argument('-r', '--junction_seqs', help='side1 and side2 of the junction sequence', required=True)
parser.add_argument('-out','--out_file', help='file to save output', )

def find_length(seq):
    """find length of shortest side of junction seq"""
    return min([len(s) for s in seq.split('_')])




if __name__ == '__main__':
    args = parser.parse_args()
    helix_seqs = processing.load_file(args.predefined_helix_seqs)
    junction_seqs = processing.load_file(args.junction_seqs)
        
    all_seqs = []
    for (idx1, helix_row), (idx2, junction_row) in itertools.product(helix_seqs.iterrows(), junction_seqs.iterrows()):

        seq = pd.Series({'side1':helix_row.h1_side1 + junction_row.side1 + helix_row.h2_side1,
                         'side2':helix_row.h2_side2 + junction_row.side2 + helix_row.h1_side2})
        seq_data = pd.concat([helix_row.drop(['h1_side1', 'h1_side2', 'h2_side1', 'h2_side2']),
                              junction_row.drop(['side1', 'side2']),
                              pd.Series({'helix_seq':helix_row.h1_side1 + '_' + helix_row.h2_side1 + '&' + helix_row.h2_side2 + '_' + helix_row.h1_side2,
                                         'junction_seq':junction_row.side1 + '_' + junction_row.side2})])
        
        all_seqs.append(pd.concat([seq, seq_data]))

Example #7

0

Show file

File: get_position_mutations.py Project: sjkdenny/JunctionLibrary

import logging
from hjh import processing


#set up command line argument parser
parser = argparse.ArgumentParser(description="script for making library")
parser.add_argument('-s', '--seqs', help='filename of things to mutate with the '
                    'positions in which to mutate (side1, side2, positions), i.e. '
                    '~/JunctionLibrary/seq_params/receptors_expt3_original.dat',
                    required=True)
parser.add_argument('-out','--out_file', help='file to save output', required=True )

if __name__=='__main__':
    args = parser.parse_args()

    receptors = processing.load_file(args.seqs)
    script = 'python ~/JunctionLibrary/mutate_seqs.py -s {in_file} -out {out_file} -p {positions}'
    working_dir = './'
    # groupby side length
    out_filenames = []
    for name, group in receptors.groupby('positions'):
        # make name machine friendly
        filename = working_dir + os.path.basename(os.path.splitext(args.seqs)[0]
                                                  + '_' + name.replace(';', '.').replace(',', ''))
        in_filename, out_filename = filename + '.dat', filename + '_muts.dat', 
        group.drop('positions', axis=1).to_csv(in_filename, index=False, sep='\t')
        call = script.format(in_file=in_filename, out_file=out_filename, positions='"%s"'%name)
        logging.info(call)
        subprocess.call(call, shell=True)
        out_filenames.append(out_filename)
    # join

Example #8

0

Show file

File: assemble_tecto_seqs.py Project: kaerdun/JunctionLibrary

from hjh import processing

#set up command line argument parser
parser = argparse.ArgumentParser(description="script for making library")
parser.add_argument('-a', '--starting_seq', help='seed sequence, i.e. a loop. Must hve "seq" column.',
                    default='seq_params/loop_1.dat')
parser.add_argument('-b', '--add_seqs', nargs="+", help='list of filenames of sequences to add. '
                    'All should have columns "side1" and "side2" of the adapter sequences')
parser.add_argument('-out','--out_file', help='file to save output', required=True)


if __name__ == '__main__':
    args = parser.parse_args()
    
    # load seqs
    loop_seqs = processing.load_file(args.starting_seq)
    other_seqs = [processing.load_file(filename) for filename in args.add_seqs]
    
    # starting with the loop, thread together the sequential pieces of the tectoRNA
    new_seqs = loop_seqs
    new_seqs.loc[:, 'starting_seq'] = new_seqs.seq
    for add_seqs in other_seqs:
        all_seqs = []
        for (idx1, row1), (idx2, row2) in itertools.product(new_seqs.iterrows(), add_seqs.iterrows()):
            seq = processing.thread_together(row1.seq, (row2.side1, row2.side2))
            seq_data = pd.concat([pd.Series({'seq':seq}),
                row1.drop('seq'), row2.drop(['side1', 'side2'])])
            all_seqs.append(seq_data)
        all_seqs = pd.concat(all_seqs, axis=1).transpose()
        new_seqs = all_seqs

Example #9

0

Show file

File: combine_tables.py Project: kaerdun/JunctionLibrary

    '--unique',
    action="store_true",
    help='[optional] whether to ensure '
    '"seq" column is unique. If set, will take first instance of each seq. default=False.'
)
parser.add_argument('-out',
                    '--out_file',
                    help='file to save output',
                    required=True)

if __name__ == '__main__':
    args = parser.parse_args()
    # load
    new_seqs = pd.concat(
        {
            '%s_%d' % (args.libname, i): processing.load_file(filename)
            for i, filename in enumerate(args.add_seqs)
        },
        names=['sublibrary', 'index'])
    if not 'sublibrary' in new_seqs.columns.tolist():
        # only add 'sublibrary' if it isn't already a columns
        new_seqs.reset_index(level=0, inplace=True)

    # make unique if option given
    if args.unique:
        old_cols = new_seqs.columns.tolist()
        new_seqs = new_seqs.groupby('seq').first().reset_index().loc[:,
                                                                     old_cols]

    # save
    ext_out = os.path.splitext(args.out_file)[-1]

Example #10

0

Show file

File: make_helix_seqs.py Project: sjkdenny/JunctionLibrary

    offsets = [0, -1, 0, -1, 0, 0]
    h1_lengths = [convert_offset_to_h1length(length-j_len, i) for i, length in zip(offsets, lengths)]
    return lengths, h1_lengths


def find_lengths_h1lengths_tar(j_len):
    """Return all lengths corresponding to 'standard' mode"""
    lengths = [8]*3 + [9]*4 + [10]*5 + [11]*5
    offsets = [-1, 0, 1] + [-2, -1, 0, 1] + [-2, -1, 0, 1, 2] + [-2, -1, 0, 1, 2]
    h1_lengths = [convert_offset_to_h1length(length-j_len, i) for i, length in zip(offsets, lengths)]
    return lengths, h1_lengths


if __name__ == '__main__':
    args = parser.parse_args()
    helix_seqs = processing.load_file(args.helix_seqs)
    junction_seqs = processing.load_file(args.junction_seqs)
    
    if args.flank_to_add:
        base_before, base_after = args.flank_to_add.split('_')
        junction_seqs.loc[:, 'no_flank'] = junction_seqs.side1 + '_' + junction_seqs.side2
        side1 = base_before + junction_seqs.side1 + base_after
        side2 = seqfun.rc(base_after, rna=True) + junction_seqs.side2 + seqfun.rc(base_before, rna=True)
        junction_seqs.loc[:, 'side1'] = side1
        junction_seqs.loc[:, 'side2'] = side2
        junction_seqs.loc[:, 'flank'] = args.flank_to_add
    
    if args.switch_sides:
        opposite_side = junction_seqs.copy()
        opposite_side.loc[:, 'side1'] = junction_seqs.side2
        opposite_side.loc[:, 'side2'] = junction_seqs.side1

Example #11

0

Show file

File: combine_tables.py Project: sjkdenny/JunctionLibrary

from hjh import processing

#set up command line argument parser
parser = argparse.ArgumentParser(description="script for making library")
parser.add_argument('-a', '--add_seqs', nargs="+", help='list of filenames of sequence data to add.')
parser.add_argument('-i', '--libname', help='[optional] the name to prepend to sublibraries, '
                    'i.e. "tertcontact". default is "library"', default='library')
parser.add_argument('-u', '--unique', action="store_true", help='[optional] whether to ensure '
                    '"seq" column is unique. If set, will take first instance of each seq. default=False.')
parser.add_argument('-out','--out_file', help='file to save output', required=True)


if __name__ == '__main__':
    args = parser.parse_args()
    # load
    new_seqs = pd.concat({'%s_%d'%(args.libname, i):processing.load_file(filename)
                          for i, filename in enumerate(args.add_seqs)}, names=['sublibrary', 'index'])
    if not 'sublibrary' in new_seqs.columns.tolist():
        # only add 'sublibrary' if it isn't already a columns
        new_seqs.reset_index(level=0, inplace=True)
    
    # make unique if option given
    if args.unique:
        old_cols = new_seqs.columns.tolist()
        new_seqs = new_seqs.groupby('seq').first().reset_index().loc[:, old_cols]
        
    
    # save
    ext_out = os.path.splitext(args.out_file)[-1]
    if ext_out == '.csv':
        new_seqs.to_csv(args.out_file, index=False)

Example #12

0

Show file

import pandas as pd
import os
import sys
import argparse
import itertools

from hjh import processing


# load sequences
for filename in ['~/JunctionLibrary/seq_params/three_way_helices.dat',
                 '~/JunctionLibrary/seq_params/three_way_helices2.dat',
                 '~/JunctionLibrary/seq_params/three_way_helices_minus1.dat',
                 '~/JunctionLibrary/seq_params/three_way_helices_minus2.dat']:

    helix_seqs = processing.load_file(filename)
    
    # split into base helix, L1 helix, and L2 helix
    keys = ['base_side1', 'base_side2']
    base_helix = helix_seqs.loc[:, keys].rename(columns={key:'h1_'+key.split('_')[-1] for key in keys}).copy()
    
    for loop_context in ['L1', 'L2']:
        if loop_context == 'L1':
            keys = ['h1_side1', 'h1_side2']
        elif loop_context == 'L2':
            keys = ['h2_side1', 'h2_side2']
        else:
            keys=None
        loop_helix = helix_seqs.loc[:, keys].rename(columns={key:'h2_'+key.split('_')[-1] for key in keys}).copy()
    
        predefined_helix =pd.concat([base_helix, loop_helix], axis=1)

Example #13

0

Show file

File: make_helix_seqs_predefined.py Project: kaerdun/JunctionLibrary

                    required=True)
parser.add_argument(
    '-out',
    '--out_file',
    help='file to save output',
)


def find_length(seq):
    """find length of shortest side of junction seq"""
    return min([len(s) for s in seq.split('_')])


if __name__ == '__main__':
    args = parser.parse_args()
    helix_seqs = processing.load_file(args.predefined_helix_seqs)
    junction_seqs = processing.load_file(args.junction_seqs)

    all_seqs = []
    for (idx1, helix_row), (idx2, junction_row) in itertools.product(
            helix_seqs.iterrows(), junction_seqs.iterrows()):

        seq = pd.Series({
            'side1':
            helix_row.h1_side1 + junction_row.side1 + helix_row.h2_side1,
            'side2':
            helix_row.h2_side2 + junction_row.side2 + helix_row.h1_side2
        })
        seq_data = pd.concat([
            helix_row.drop(['h1_side1', 'h1_side2', 'h2_side1', 'h2_side2']),
            junction_row.drop(['side1', 'side2']),

Example #14

0

Show file

File: get_ss_structure_set.py Project: sjkdenny/JunctionLibrary

#!/usr/bin/env python

# Author: Sarah Denny, Stanford University 

# Provides modular tools for making a helix-junction
# helix DNA library


##### IMPORT #####
import numpy as np
import pandas as pd
import os
import sys
import argparse
import itertools

from hjh import processing

#set up command line argument parser
parser = argparse.ArgumentParser(description="script for making library")
parser.add_argument('-s', '--seqs',  help='filenames with column "seq" to test ss structure of.')
parser.add_argument('-out','--out_file', help='file to save output', required=True)


if __name__ == '__main__':
    args = parser.parse_args()
    seqs = processing.load_file(args.seqs)
    seqs.loc[:, 'ss'] = processing.check_ss_structure_set(seqs)
    
    ## save
    seqs.to_csv(args.out_file, index=False, sep='\t')