import sys

#look for the amount of noise by checking coverage values at OFFSETnt upstream from gene stops
parser = argparse.ArgumentParser(description="parser for a gene and a coverage file")
parser.add_argument("GENES_FILE", help="path to gene file you want to use")
parser.add_argument("POSITIVE_COVERAGE_FILE", help="path to coverage file you want to use")

args = vars(parser.parse_args())

gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE'])

#select a certain number of random genes
NUM_GENES = 500
gene_lines_rand = []
for _ in range(NUM_GENES):
    line_data = geneTools.getLineData(gene_lines[random.randint(0, len(gene_lines) - 1)], gene_filetype)
    if line_data not in gene_lines_rand:
        gene_lines_rand.append(line_data)
gene_lines_rand.sort()

pos_lines, neg_lines = geneTools.readCoverageLines(args['POSITIVE_COVERAGE_FILE'])

ind_pos = 0
ind_neg = 0

x_pos = int(pos_lines[ind_pos].split('\t')[1])
x_neg = int(neg_lines[ind_neg].split('\t')[1])

#ribosome profiling reads are performed by recording OFFSET nt upstream of the 3' end
OFFSET = 30
Exemple #2
0
gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE'])

#select a certain number of random genes
NUM_GENES = 500
DIRECT_ONLY = True
SAVE_GENES = False

#enforce that no gene on either strand starts within ZONE_LENGTH nt of the stop codon
gene_lines_rand = []
ZONE_LENGTH = 50
attempts = 0
index = 0
while len(gene_lines_rand) < NUM_GENES:
    attempts += 1
    index = random.randint(0, len(gene_lines) - 1)
    line1 = geneTools.getLineData(gene_lines[index], gene_filetype)
    if line1[2] == "-":
        if DIRECT_ONLY:
            #do nothing if only looking at direct genes
            continue
        if index == 0:
            end2 = -1 * sysmaxint
        else:
            line2 = geneTools.getLineData(gene_lines[index - 1], gene_filetype)
            end2 = line2[1]
        start1 = line1[0]
        if end2 < start1 - ZONE_LENGTH and line1 not in gene_lines_rand:
            gene_lines_rand.append(line1)
        elif end2 > start1 - ZONE_LENGTH:
            print "found a - gene with too-close neighbor"
    elif line1[2] == "+":
        #make it a Seq object and reverse complement
        subseq = Seq(seq[start - 1:start + NUM - 1], unambiguous_dna)
        subseq = str(Seq.reverse_complement(subseq))
        print "gene: " + str(start) + "-" + str(
            end) + " COMPLEMENTARY: ..." + subseq
############END########
sys.exit(0)

gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE'])

NUM_GENES = 100
gene_lines_rand = []
attempts = 0
while len(gene_lines_rand) < NUM_GENES:
    index = random.randint(0, len(gene_lines) - 1)
    gene = geneTools.getLineData(gene_lines[index], gene_filetype)
    if gene not in gene_lines_rand:
        gene_lines_rand.append(gene)
        attempts += 1
#print "attempts: " + str(attempts)
gene_lines_rand.sort()

NUM = 6

SEQUENCE_FILE = open(args['SEQUENCE_FILE'], 'r')
#read off header line
SEQUENCE_FILE.readline()
sequence_lines = SEQUENCE_FILE.read().splitlines()
seq = "".join(sequence_lines).replace('\n', '')

for line in gene_lines_rand:
Exemple #4
0
    "parser for removing one set of (verified) genes from another set of genes"
)
parser.add_argument("VERIFIED_GENES_FILE",
                    help="path to file containing verified genes")
parser.add_argument("GENES_FILE",
                    help="path to file containing another set of genes")
parser.add_argument("OUT_FILE",
                    help="path to output file you would like to create")

args = vars(parser.parse_args())

v_lines, v_filetype = geneTools.readORFLines(args["VERIFIED_GENES_FILE"])
lines, filetype = geneTools.readORFLines(args["GENES_FILE"])

toRemove = []

for v_line in v_lines:
    v_start, v_end, v_strand = geneTools.getLineData(v_line, v_filetype)
    for line in lines:
        start, end, strand = geneTools.getLineData(line, filetype)
        if v_start == start and v_end == end and v_strand == strand:
            toRemove.append((start, end, strand))

#have list of (start, end, strand) tuples to remove
#now go through genes file and add all lines to ouput file unless it is in toRemove
with open(args["OUT_FILE"], 'w') as of:
    for line in lines:
        start, end, strand = geneTools.getLineData(line, filetype)
        if (start, end, strand) not in toRemove:
            of.write(line + '\n')
Exemple #5
0
parser = argparse.ArgumentParser(
    description="parser for a gene and a coverage file")
parser.add_argument("GENES_FILE", help="path to gene file you want to use")
parser.add_argument("POSITIVE_COVERAGE_FILE",
                    help="path to coverage file you want to use")

args = vars(parser.parse_args())

gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE'])

#select a certain number of random genes
NUM_GENES = 500
gene_lines_rand = []
for _ in range(NUM_GENES):
    line_data = geneTools.getLineData(
        gene_lines[random.randint(0,
                                  len(gene_lines) - 1)], gene_filetype)
    if line_data not in gene_lines_rand:
        gene_lines_rand.append(line_data)
gene_lines_rand.sort()

pos_lines, neg_lines = geneTools.readCoverageLines(
    args['POSITIVE_COVERAGE_FILE'])

ind_pos = 0
ind_neg = 0

x_pos = int(pos_lines[ind_pos].split('\t')[1])
x_neg = int(neg_lines[ind_neg].split('\t')[1])

#ribosome profiling reads are performed by recording OFFSET nt upstream of the 3' end
import argparse
import geneTools

parser = argparse.ArgumentParser(description="parser for removing one set of (verified) genes from another set of genes")
parser.add_argument("VERIFIED_GENES_FILE", help="path to file containing verified genes")
parser.add_argument("GENES_FILE", help="path to file containing another set of genes")
parser.add_argument("OUT_FILE", help="path to output file you would like to create")

args = vars(parser.parse_args())

v_lines, v_filetype = geneTools.readORFLines(args["VERIFIED_GENES_FILE"])
lines, filetype = geneTools.readORFLines(args["GENES_FILE"])

toRemove = []

for v_line in v_lines:
    v_start, v_end, v_strand = geneTools.getLineData(v_line, v_filetype)
    for line in lines:
        start, end, strand = geneTools.getLineData(line, filetype)
        if v_start == start and v_end == end and v_strand == strand:
            toRemove.append((start, end, strand))

#have list of (start, end, strand) tuples to remove
#now go through genes file and add all lines to ouput file unless it is in toRemove
with open(args["OUT_FILE"], 'w') as of:
    for line in lines:
        start, end, strand = geneTools.getLineData(line, filetype)
        if (start, end, strand) not in toRemove:
            of.write(line + '\n')