if x == 15: return "XV" if x == 16: return "XVI" if x == 17: return "XVII" if x == 18: return "XVIII" if x == 19: return "XIX" if x == 20: return "XX" return None tabpath = ap.getArg("--in") if False == os.path.exists(tabpath): print "\n. I cannot find your TAB file at", tabpath exit() gffpath = ap.getOptionalArg("--out") if gffpath == False or gffpath == None: gffpath = tabpath + ".gff" fin = open(tabpath, "r") fout = open(gffpath, "w") fout.write("# gff-version 3\n") fout.write("# This file was auto-generated by the script tab2gff.py\n") fout.write("# using data from the TAB file " + tabpath + "\n") fout.write("#\n") for l in fin.xreadlines():
import os, sys from argParser import ArgParser ap = ArgParser(sys.argv) gffpath = ap.getArg("--gff") if False == os.path.exists(gffpath): print "\n. Error. I can't find your GFF at", gffpath exit() bdgpath = ap.getArg("--bdg") if bdgpath != False: if False == os.path.exists(bdgpath): print "\n. Error. I can't find your BDG at", bdgpath exit() genename = ap.getArg("--genename") regsize = int( ap.getArg("--regsize") ) chrom = None start = None stop = None strand = None fin = open(gffpath, "r") for l in fin.xreadlines(): if l.__len__() > 0 and False == l.startswith("#"): tokens = l.split() this_gene = tokens[8].split(";")[0].split("=")[1] # orfName if this_gene == genename: chrom = tokens[0] start = int( tokens[3] )
# # Create a master table of gene-peak occurances across multiple species and replicate conditions # import os, sys, re from plot_venn import * from argParser import ArgParser ap = ArgParser(sys.argv) runids = ap.getList("--runids") # a list of runid names runid = ap.getArg("--runid") # the (output) runid for this peak comparison pillarspath = ap.getArg("--pillars") # a file containing G orf_names = {} def splash(): print "============================================" print " compare_peaks.py" print " Victor Hanson-Smith, 2014" print " [email protected]" print "============================================" splash() def get_runid_reps(): print "\n. Reading Replicates..." runid_reps = {} for t in runids: if t not in runid_reps: runid_reps[t] = []
import os, sys from argParser import ArgParser ap = ArgParser(sys.argv) gffpath = ap.getArg("--gff") if False == os.path.exists(gffpath): print "\n. Error. I can't find your GFF at", gffpath exit() bdgpath = ap.getArg("--bdg") if bdgpath != False: if False == os.path.exists(bdgpath): print "\n. Error. I can't find your BDG at", bdgpath exit() genename = ap.getArg("--genename") regsize = int(ap.getArg("--regsize")) chrom = None start = None stop = None strand = None fin = open(gffpath, "r") for l in fin.xreadlines(): if l.__len__() > 0 and False == l.startswith("#"): tokens = l.split() this_gene = tokens[8].split(";")[0].split("=")[1] # orfName if this_gene == genename: chrom = tokens[0]
con = build_db(dbpath=dbpath) """--make_testdb is a dev-only option""" if True == ap.getOptionalToggle("--make_testdb"): reduce_db_for_test(con) exit() """--reset_files is a dev-only option""" if True == ap.getOptionalToggle("--reset_files"): reset_files(con) exit() # # IMPORT # pillarspath = ap.getArg("--pillarspath") if False == ap.getOptionalToggle("--skip_pillars_import") and False == ap.getOptionalToggle("--skip_import"): con = import_pillars(pillarspath, con) else: print "\n. I'm skipping pillars import. I will use the existing pillars in the database." print_pillarsstats(con) if False == ap.getOptionalToggle("--skip_gff") and False == ap.getOptionalToggle("--skip_import"): import_gffs(con) resolve_aliasids(con) import_redflagregions(con, ap) import_intergenic_regions(con) if False == ap.getOptionalToggle("--skip_import"):
# # Draw a subsample of reads from a FASTQ file. # import sys, os from argParser import ArgParser ap = ArgParser(sys.argv) inpath = ap.getArg("--in") outpath = ap.getArg("--out") n = int(ap.getArg("--n")) fin = open(inpath, "r") fout = open(outpath, "w") count_reads = 0 lc = 0 for l in fin.xreadlines(): if lc == 0: count_reads += 1 if count_reads <= n: fout.write(l) else: break lc += 1 if lc == 4: lc = 0 if count_reads % 10 == 0: sys.stdout.write("\r --> %.1f%%" % (100 * count_reads / float(n)))
title="", force_square=False, plot_as_rank=[], skip_identity=False, skip_zeros=False, unit_labels=[], xlab=None, ylab=None) ############################## # # main # motifpath = ap.getArg("--motifpath") readsdbpath = ap.getArg("--readdbpath") rcon = lite.connect(readsdbpath, timeout=1) vizdbpath = ap.getArg("--vizdbpath") vcon = build_db(dbpath=vizdbpath) vcon = build_motif_dbtables(vcon) """Import motifs""" vcur = vcon.cursor() gene_motif = read_motifs(motifpath) motifname_id = {} for genename in gene_motif: sql = "select id from Motifs where name='" + genename + "'" vcur.execute(sql) if vcur.fetchone() == None:
"""Regardless of which options are being executed, the DB gets built, or rebuilt, depending on its status.""" con = build_db(dbpath=dbpath) """--make_testdb is a dev-only option""" if True == ap.getOptionalToggle("--make_testdb"): reduce_db_for_test(con) exit() """--reset_files is a dev-only option""" if True == ap.getOptionalToggle("--reset_files"): reset_files(con) exit() # # IMPORT # pillarspath = ap.getArg("--pillarspath") if False == ap.getOptionalToggle( "--skip_pillars_import") and False == ap.getOptionalToggle( "--skip_import"): con = import_pillars(pillarspath, con) else: print "\n. I'm skipping pillars import. I will use the existing pillars in the database." print_pillarsstats(con) if False == ap.getOptionalToggle( "--skip_gff") and False == ap.getOptionalToggle("--skip_import"): import_gffs(con) resolve_aliasids(con) import_redflagregions(con, ap)
# # Draw a subsample of reads from a FASTQ file. # import sys, os from argParser import ArgParser ap = ArgParser(sys.argv) inpath = ap.getArg("--in") outpath = ap.getArg("--out") n = int( ap.getArg("--n") ) fin = open(inpath, "r") fout = open(outpath, "w") count_reads = 0 lc = 0 for l in fin.xreadlines(): if lc == 0: count_reads += 1 if count_reads <= n: fout.write( l ) else: break lc += 1 if lc == 4: lc = 0 if count_reads%10 == 0: sys.stdout.write("\r --> %.1f%%" % (100*count_reads/float(n)) )
fout.close() if xvalues.__len__() != yvalues.__len__(): print "ERROR 272: An error occurred while writing the write_peak_motif_table." exit() if xvalues.__len__() > 0 and yvalues.__len__() > 0: scatter_nxm(2, 2, [xvalues,yvalues], ["max motif score","fold-enrichment"], groupname + ".motifs_vs_fe." + motifid_name[ mid ], title="", force_square=False, plot_as_rank = [], skip_identity = False, skip_zeros = False, unit_labels=[], xlab=None, ylab=None) ############################## # # main # motifpath = ap.getArg("--motifpath") readsdbpath = ap.getArg("--readdbpath") rcon = lite.connect(readsdbpath, timeout=1) vizdbpath = ap.getArg("--vizdbpath") vcon = build_db(dbpath=vizdbpath) vcon = build_motif_dbtables(vcon) """Import motifs""" vcur = vcon.cursor() gene_motif = read_motifs(motifpath) motifname_id = {} for genename in gene_motif: sql = "select id from Motifs where name='" + genename + "'" vcur.execute(sql)
# # This script is for generating toy-sized test cases # # Subsample a track file, such as BDGs and BEDs # import sys, os from argParser import ArgParser ap = ArgParser(sys.argv) bdgpath = ap.getArg("--in") outpath = ap.getArg("--out") # output path of subsampled BDG # Lines in the BDG that contain the keyword will be sampled. # all other lines will be discarded keyword = ap.getArg("--keyword") fout = open(outpath, "w") fin = open(bdgpath, "r") for l in fin.xreadlines(): if l.__contains__(keyword): fout.write( l ) fin.close() fout.close()
################################################# # # USAGE: # # python compare_asr_dat_files.py [<id> <dat filepath> . . .] # import os import sys import re from map_anc_2_anc import * from argParser import ArgParser ap = ArgParser(sys.argv) anc1 = ap.getArg("--anc1") nick1 = ap.getArg("--nick1") msa1 = ap.getArg("--msa1") seed1 = ap.getArg("--seed1") anc2 = ap.getArg("--anc2") nick2 = ap.getArg("--nick2") msa2 = ap.getArg("--msa2") seed2 = ap.getArg("--seed2") runid = ap.getOptionalArg("--runid") if runid == False: exit() rsitesa = [] x = ap.getOptionalList("--restrict_sites_1") if x != None:
import math import os import re import sys from argParser import ArgParser argp = ArgParser(sys.argv) spath = argp.getArg("--input") fin = open( spath, "r" ) lines = fin.readlines() fin.close() output = argp.getArg("--output") def plot_comb(cat): cranscript = "plot." + output + "." + cat.__str__() + ".cran" f = open(cranscript, "w") y = "y <- c(" x = "x <- c(" for site in site_pps: y += site_pps[site][cat].__str__() + "," x += site.__str__() + "," y = re.sub(",$", "", y) y += ")" f.write( y + "\n") x = re.sub(",$", "", x)
# # This script is for generating toy-sized test cases # # Subsample a track file, such as BDGs and BEDs # import sys, os from argParser import ArgParser ap = ArgParser(sys.argv) bdgpath = ap.getArg("--in") outpath = ap.getArg("--out") # output path of subsampled BDG # Lines in the BDG that contain the keyword will be sampled. # all other lines will be discarded keyword = ap.getArg("--keyword") fout = open(outpath, "w") fin = open(bdgpath, "r") for l in fin.xreadlines(): if l.__contains__(keyword): fout.write(l) fin.close() fout.close()