def d5s_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/" #from spats_shape_seq.db import PairDB #pair_db = PairDB(bp + "dev_out/pairs.db") #if False: # pair_db.add_targets_table(bp + "5s/5S.fa") # pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", # bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq") from spats_shape_seq import Spats from spats_shape_seq.partial import PartialFindProcessor s = Spats() #s.run._processor_class = PartialFindProcessor s.run.skip_database = True #s.run.writeback_results = True #s.run.resume_processing = True #s.run.result_set_name = "lookup" s.addTargets(bp + "5s/5S.fa") #s.process_pair_db(pair_db) s.process_pair_data( bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq" ) s.compute_profiles() s.write_reactivities(bp + "dev_out/rx2.out")
def spats(target, r1, r2, out, show_sites = True): from spats_shape_seq import Spats, spats_config s = Spats() s.addTargets(target) s.addMasks("RRRY", "YYYR") if show_sites: spats_config.show_id_to_site = True s.process_pair_data(r1, r2) if not show_sites: s.compute_profiles() s.write_reactivities(out + "/rx.out")
def spats(target, r1, r2, out, show_sites=True): from spats_shape_seq import Spats, spats_config s = Spats() s.addTargets(target) s.addMasks("RRRY", "YYYR") if show_sites: spats_config.show_id_to_site = True s.process_pair_data(r1, r2) if not show_sites: s.compute_profiles() s.write_reactivities(out + "/rx.out")
def test_refactor(): from spats_clean import Spats bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/" out = bp + "t3/" s = Spats(bp + "5S.fa", out) s.setup() s.process_pair_data(bp + "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", bp + "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq") s.compute_profiles() s.write_reactivities() import subprocess subprocess.check_call(["diff", bp + "t2/rx.out", out + "/rx.out"]) print "Diff OK"
def cotrans_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/datasets/cotrans/" from spats_shape_seq import Spats s = Spats() #from spats_shape_seq.partial import PartialFindProcessor #s.run._processor_class = PartialFindProcessor s.run.skip_database = True #s.run.writeback_results = True #s.run.resume_processing = True #s.run.result_set_name = "lookup" s.addTargets(bp + "F_wt.fa") s.process_pair_data(bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R1.fastq", bp + "data/EJS_6_F_10mM_NaF_Rep1_GCCAAT_R2.fastq") s.compute_profiles() s.write_reactivities(bp + "dev_out/rx.out")
def test_refactor(): from spats_clean import Spats bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/5sq_dev/" out = bp + "t3/" s = Spats(bp + "5S.fa", out) s.setup() s.process_pair_data( bp + "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", bp + "data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq" ) s.compute_profiles() s.write_reactivities() import subprocess subprocess.check_call(["diff", bp + "t2/rx.out", out + "/rx.out"]) print "Diff OK"
def d5s_run(): bp = "/Users/jbrink/mos/tasks/1RwIBa/tmp/" #from spats_shape_seq.db import PairDB #pair_db = PairDB(bp + "dev_out/pairs.db") #if False: # pair_db.add_targets_table(bp + "5s/5S.fa") # pair_db.parse(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", # bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq") from spats_shape_seq import Spats from spats_shape_seq.partial import PartialFindProcessor s = Spats() #s.run._processor_class = PartialFindProcessor s.run.skip_database = True #s.run.writeback_results = True #s.run.resume_processing = True #s.run.result_set_name = "lookup" s.addTargets(bp + "5s/5S.fa") #s.process_pair_db(pair_db) s.process_pair_data(bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R1_001.fastq", bp + "5s/data/17571-AD1AW-KEW11-5S-2p1-18x-23FEB15-GGCTAC_S10_L001_R2_001.fastq") s.compute_profiles() s.write_reactivities(bp + "dev_out/rx2.out")
def _dump_run(self): run_name = self._run_file() if not os.path.exists(run_name): raise Exception("Run must be run before attempting dump") spats = Spats() spats.load(run_name) profiles = spats.compute_profiles() mutations = spats.run.count_mutations indels = spats.run.handle_indels headers = [ "L", "site", "nt", "f+", "f-" ] if indels: headers += [ "ins+", "ins-", "del+", "del-" ] if mutations: headers += [ "mut+", "mut-", "beta", "mu", "r" ] else: headers += [ "beta", "theta", "rho" ] headers += [ "c", "c alt" ] data = [] if self.cotrans: tgt = spats.targets.targets[0] tseq = tgt.seq for key in profiles.cotrans_keys(): end = int(key.split('_')[-1]) prof = profiles.profilesForTargetAndEnd(tgt.name, end) for i in xrange(end + 1): datapt = [ end, i, tseq[i - 1] if i else '*', prof.treated[i], prof.untreated[i] ] if indels: datapt += [ prof.treated_inserts[i], prof.untreated_inserts[i], prof.treated_deletes[i], prof.untreated_deletes[i] ] if mutations: datapt += [ prof.treated_muts[i], prof.untreated_muts[i], prof.beta[i], prof.mu[i], prof.r_mut[i] ] else: datapt += [ prof.beta[i], prof.theta[i], prof.rho[i] ] datapt += [ prof.c, prof.c_alt ] data.append(datapt) output_path = os.path.join(self.path, '{}.csv'.format(tgt.name)) self._write_csv(output_path, headers, data) empty_cell = '' keys = [ 'treated', 'untreated' ] if indels: keys += [ 'treated_inserts', 'untreated_inserts', 'treated_deletes', 'untreated_deletes' ] if mutations: keys += [ 'treated_mut', 'untreated_mut', 'beta', 'mu', 'r' ] else: keys += [ 'beta', 'theta', 'rho' ] cotrans_keys = profiles.cotrans_keys() for key in keys: ncols = 0 mat = [] for pkey in cotrans_keys: end = int(pkey.split('_')[-1]) prof = profiles.profilesForTargetAndEnd(tgt.name, end) vals = getattr(prof, key) if not ncols: ncols = len(cotrans_keys) + len(vals) if len(vals) < ncols: vals += ([empty_cell] * (ncols - len(vals))) mat.append(vals) self._write_csv('{}_{}_mat.csv'.format(tgt.name, key), None, mat) else: for tgt in spats.targets.targets: tseq = tgt.seq end = len(tgt.seq) prof = profiles.profilesForTarget(tgt) data = [] for i in xrange(end + 1): datapt = [ end, i, tseq[i - 1] if i else '*', prof.treated[i], prof.untreated[i] ] if indels: datapt += [ prof.treated_inserts[i], prof.untreated_inserts[i], prof.treated_deletes[i], prof.untreated_deletes[i] ] if mutations: datapt += [ prof.treated_muts[i], prof.untreated_muts[i], prof.beta[i], prof.mu[i], prof.r_mut[i] ] else: datapt += [ prof.beta[i], prof.theta[i], prof.rho[i] ] datapt += [ prof.c, prof.c_alt ] data.append(datapt) output_path = os.path.join(self.path, '{}.csv'.format(tgt.name)) self._write_csv(output_path, headers, data)