Esempio n. 1
0
    if x == 15:
        return "XV"
    if x == 16:
        return "XVI"
    if x == 17:
        return "XVII"
    if x == 18:
        return "XVIII"
    if x == 19:
        return "XIX"
    if x == 20:
        return "XX"
    return None


tabpath = ap.getArg("--in")
if False == os.path.exists(tabpath):
    print "\n. I cannot find your TAB file at", tabpath
    exit()

gffpath = ap.getOptionalArg("--out")
if gffpath == False or gffpath == None:
    gffpath = tabpath + ".gff"

fin = open(tabpath, "r")
fout = open(gffpath, "w")
fout.write("# gff-version 3\n")
fout.write("# This file was auto-generated by the script tab2gff.py\n")
fout.write("# using data from the TAB file " + tabpath + "\n")
fout.write("#\n")
for l in fin.xreadlines():
Esempio n. 2
0
import os, sys
from argParser import ArgParser
ap = ArgParser(sys.argv)

gffpath = ap.getArg("--gff")
if False == os.path.exists(gffpath):
    print "\n. Error. I can't find your GFF at", gffpath
    exit()

bdgpath = ap.getArg("--bdg")
if bdgpath != False:
    if False == os.path.exists(bdgpath):
        print "\n. Error. I can't find your BDG at", bdgpath
        exit()

genename = ap.getArg("--genename")

regsize = int( ap.getArg("--regsize") )

chrom = None
start = None
stop = None
strand = None
fin = open(gffpath, "r")
for l in fin.xreadlines():
    if l.__len__() > 0 and False == l.startswith("#"):
        tokens = l.split()
        this_gene = tokens[8].split(";")[0].split("=")[1] # orfName 
        if this_gene == genename:
            chrom = tokens[0]
            start = int( tokens[3] )
Esempio n. 3
0
#
# Create a master table of gene-peak occurances across multiple species and replicate conditions
#

import os, sys, re

from plot_venn import *

from argParser import ArgParser
ap = ArgParser(sys.argv)

runids = ap.getList("--runids") # a list of runid names
runid = ap.getArg("--runid") # the (output) runid for this peak comparison
pillarspath = ap.getArg("--pillars") # a file containing G
orf_names = {}

def splash():
    print "============================================"
    print "  compare_peaks.py"
    print "  Victor Hanson-Smith, 2014"
    print "  [email protected]"
    print "============================================"
    
splash()

def get_runid_reps():
    print "\n. Reading Replicates..."
    runid_reps = {}
    for t in runids:
        if t not in runid_reps:
            runid_reps[t] = []
Esempio n. 4
0
import os, sys
from argParser import ArgParser

ap = ArgParser(sys.argv)

gffpath = ap.getArg("--gff")
if False == os.path.exists(gffpath):
    print "\n. Error. I can't find your GFF at", gffpath
    exit()

bdgpath = ap.getArg("--bdg")
if bdgpath != False:
    if False == os.path.exists(bdgpath):
        print "\n. Error. I can't find your BDG at", bdgpath
        exit()

genename = ap.getArg("--genename")

regsize = int(ap.getArg("--regsize"))

chrom = None
start = None
stop = None
strand = None
fin = open(gffpath, "r")
for l in fin.xreadlines():
    if l.__len__() > 0 and False == l.startswith("#"):
        tokens = l.split()
        this_gene = tokens[8].split(";")[0].split("=")[1]  # orfName
        if this_gene == genename:
            chrom = tokens[0]
Esempio n. 5
0
con = build_db(dbpath=dbpath)

"""--make_testdb is a dev-only option"""
if True == ap.getOptionalToggle("--make_testdb"):
    reduce_db_for_test(con)
    exit()

"""--reset_files is a dev-only option"""
if True == ap.getOptionalToggle("--reset_files"):
    reset_files(con)
    exit()

# 
# IMPORT
#
pillarspath = ap.getArg("--pillarspath")
if False == ap.getOptionalToggle("--skip_pillars_import") and False == ap.getOptionalToggle("--skip_import"):
    con = import_pillars(pillarspath, con)
else:
    print "\n. I'm skipping pillars import. I will use the existing pillars in the database."
print_pillarsstats(con)

if False == ap.getOptionalToggle("--skip_gff") and False == ap.getOptionalToggle("--skip_import"):
    import_gffs(con)
    resolve_aliasids(con)

import_redflagregions(con, ap)

import_intergenic_regions(con)

if False == ap.getOptionalToggle("--skip_import"):
Esempio n. 6
0
#
# Draw a subsample of reads from a FASTQ file.
#
import sys, os
from argParser import ArgParser
ap = ArgParser(sys.argv)

inpath = ap.getArg("--in")
outpath = ap.getArg("--out")
n = int(ap.getArg("--n"))

fin = open(inpath, "r")
fout = open(outpath, "w")

count_reads = 0
lc = 0
for l in fin.xreadlines():
    if lc == 0:
        count_reads += 1

    if count_reads <= n:
        fout.write(l)
    else:
        break

    lc += 1
    if lc == 4:
        lc = 0

    if count_reads % 10 == 0:
        sys.stdout.write("\r    --> %.1f%%" % (100 * count_reads / float(n)))
Esempio n. 7
0
                            title="",
                            force_square=False,
                            plot_as_rank=[],
                            skip_identity=False,
                            skip_zeros=False,
                            unit_labels=[],
                            xlab=None,
                            ylab=None)


##############################
#
# main
#

motifpath = ap.getArg("--motifpath")

readsdbpath = ap.getArg("--readdbpath")
rcon = lite.connect(readsdbpath, timeout=1)

vizdbpath = ap.getArg("--vizdbpath")
vcon = build_db(dbpath=vizdbpath)
vcon = build_motif_dbtables(vcon)
"""Import motifs"""
vcur = vcon.cursor()
gene_motif = read_motifs(motifpath)
motifname_id = {}
for genename in gene_motif:
    sql = "select id from Motifs where name='" + genename + "'"
    vcur.execute(sql)
    if vcur.fetchone() == None:
Esempio n. 8
0
"""Regardless of which options are being executed, the DB gets
built, or rebuilt, depending on its status."""
con = build_db(dbpath=dbpath)
"""--make_testdb is a dev-only option"""
if True == ap.getOptionalToggle("--make_testdb"):
    reduce_db_for_test(con)
    exit()
"""--reset_files is a dev-only option"""
if True == ap.getOptionalToggle("--reset_files"):
    reset_files(con)
    exit()

#
# IMPORT
#
pillarspath = ap.getArg("--pillarspath")
if False == ap.getOptionalToggle(
        "--skip_pillars_import") and False == ap.getOptionalToggle(
            "--skip_import"):
    con = import_pillars(pillarspath, con)
else:
    print "\n. I'm skipping pillars import. I will use the existing pillars in the database."
print_pillarsstats(con)

if False == ap.getOptionalToggle(
        "--skip_gff") and False == ap.getOptionalToggle("--skip_import"):
    import_gffs(con)
    resolve_aliasids(con)

import_redflagregions(con, ap)
Esempio n. 9
0
#
# Draw a subsample of reads from a FASTQ file.
#
import sys, os
from argParser import ArgParser
ap = ArgParser(sys.argv)

inpath = ap.getArg("--in")
outpath = ap.getArg("--out")
n = int( ap.getArg("--n") ) 

fin = open(inpath, "r")
fout = open(outpath, "w")

count_reads = 0
lc = 0
for l in fin.xreadlines():
    if lc == 0:
        count_reads += 1

    if count_reads <= n:
        fout.write( l )
    else:
        break
    
    lc += 1
    if lc == 4:
        lc = 0
    
    if count_reads%10 == 0:
        sys.stdout.write("\r    --> %.1f%%" % (100*count_reads/float(n)) )
Esempio n. 10
0
            fout.close()
            
            if xvalues.__len__() != yvalues.__len__():
                print "ERROR 272: An error occurred while writing the write_peak_motif_table."
                exit()
            
            if xvalues.__len__() > 0 and yvalues.__len__() > 0:
                scatter_nxm(2, 2, [xvalues,yvalues], ["max motif score","fold-enrichment"], groupname + ".motifs_vs_fe." + motifid_name[ mid ], title="", force_square=False, plot_as_rank = [], skip_identity = False, skip_zeros = False, unit_labels=[], xlab=None, ylab=None)


##############################
#
# main
#

motifpath = ap.getArg("--motifpath")

readsdbpath = ap.getArg("--readdbpath")
rcon = lite.connect(readsdbpath, timeout=1)

vizdbpath = ap.getArg("--vizdbpath")
vcon = build_db(dbpath=vizdbpath)
vcon = build_motif_dbtables(vcon)

"""Import motifs"""
vcur = vcon.cursor()
gene_motif = read_motifs(motifpath)
motifname_id = {}
for genename in gene_motif:
    sql = "select id from Motifs where name='" + genename + "'"
    vcur.execute(sql)
Esempio n. 11
0
#
# This script is for generating toy-sized test cases
#
# Subsample a track file, such as BDGs and BEDs
#

import sys, os
from argParser import ArgParser
ap = ArgParser(sys.argv)

bdgpath = ap.getArg("--in")
outpath = ap.getArg("--out") # output path of subsampled BDG

# Lines in the BDG that contain the keyword will be sampled.
# all other lines will be discarded
keyword = ap.getArg("--keyword")

fout = open(outpath, "w")

fin = open(bdgpath, "r")
for l in fin.xreadlines():
    if l.__contains__(keyword):
        fout.write( l )
fin.close()
fout.close()
Esempio n. 12
0
#################################################
#
# USAGE:
#
# python compare_asr_dat_files.py [<id> <dat filepath> . . .]
#

import os
import sys
import re
from map_anc_2_anc import *
from argParser import ArgParser
ap = ArgParser(sys.argv)

anc1 = ap.getArg("--anc1")
nick1 = ap.getArg("--nick1")
msa1 = ap.getArg("--msa1")
seed1 = ap.getArg("--seed1")

anc2 = ap.getArg("--anc2")
nick2 = ap.getArg("--nick2")
msa2 = ap.getArg("--msa2")
seed2 = ap.getArg("--seed2")

runid = ap.getOptionalArg("--runid")
if runid == False:
    exit()

rsitesa = []
x = ap.getOptionalList("--restrict_sites_1")
if x != None:
Esempio n. 13
0
import math
import os
import re
import sys

from argParser import ArgParser
argp = ArgParser(sys.argv)

spath = argp.getArg("--input")
fin = open( spath, "r" )
lines = fin.readlines()
fin.close()

output = argp.getArg("--output")

def plot_comb(cat):
    cranscript = "plot." + output + "." + cat.__str__() + ".cran"
    f = open(cranscript, "w")
 
    y = "y <- c("
    x = "x <- c("
    
    for site in site_pps:
        y += site_pps[site][cat].__str__() + ","
        x += site.__str__() + ","

    y = re.sub(",$", "", y)
    y += ")"
    f.write( y + "\n")  
    
    x = re.sub(",$", "", x)
#################################################
#
# USAGE:
#
# python compare_asr_dat_files.py [<id> <dat filepath> . . .]
#

import os
import sys
import re
from map_anc_2_anc import *
from argParser import ArgParser
ap = ArgParser(sys.argv)

anc1 = ap.getArg("--anc1")
nick1 = ap.getArg("--nick1")
msa1 = ap.getArg("--msa1")
seed1 = ap.getArg("--seed1")

anc2 = ap.getArg("--anc2")
nick2 = ap.getArg("--nick2")
msa2 = ap.getArg("--msa2")
seed2 = ap.getArg("--seed2")

runid = ap.getOptionalArg("--runid")
if runid == False:
    exit()

rsitesa = []
x = ap.getOptionalList("--restrict_sites_1")
if x != None:
Esempio n. 15
0
#
# This script is for generating toy-sized test cases
#
# Subsample a track file, such as BDGs and BEDs
#

import sys, os
from argParser import ArgParser
ap = ArgParser(sys.argv)

bdgpath = ap.getArg("--in")
outpath = ap.getArg("--out")  # output path of subsampled BDG

# Lines in the BDG that contain the keyword will be sampled.
# all other lines will be discarded
keyword = ap.getArg("--keyword")

fout = open(outpath, "w")

fin = open(bdgpath, "r")
for l in fin.xreadlines():
    if l.__contains__(keyword):
        fout.write(l)
fin.close()
fout.close()