import sys #look for the amount of noise by checking coverage values at OFFSETnt upstream from gene stops parser = argparse.ArgumentParser(description="parser for a gene and a coverage file") parser.add_argument("GENES_FILE", help="path to gene file you want to use") parser.add_argument("POSITIVE_COVERAGE_FILE", help="path to coverage file you want to use") args = vars(parser.parse_args()) gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE']) #select a certain number of random genes NUM_GENES = 500 gene_lines_rand = [] for _ in range(NUM_GENES): line_data = geneTools.getLineData(gene_lines[random.randint(0, len(gene_lines) - 1)], gene_filetype) if line_data not in gene_lines_rand: gene_lines_rand.append(line_data) gene_lines_rand.sort() pos_lines, neg_lines = geneTools.readCoverageLines(args['POSITIVE_COVERAGE_FILE']) ind_pos = 0 ind_neg = 0 x_pos = int(pos_lines[ind_pos].split('\t')[1]) x_neg = int(neg_lines[ind_neg].split('\t')[1]) #ribosome profiling reads are performed by recording OFFSET nt upstream of the 3' end OFFSET = 30
gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE']) #select a certain number of random genes NUM_GENES = 500 DIRECT_ONLY = True SAVE_GENES = False #enforce that no gene on either strand starts within ZONE_LENGTH nt of the stop codon gene_lines_rand = [] ZONE_LENGTH = 50 attempts = 0 index = 0 while len(gene_lines_rand) < NUM_GENES: attempts += 1 index = random.randint(0, len(gene_lines) - 1) line1 = geneTools.getLineData(gene_lines[index], gene_filetype) if line1[2] == "-": if DIRECT_ONLY: #do nothing if only looking at direct genes continue if index == 0: end2 = -1 * sysmaxint else: line2 = geneTools.getLineData(gene_lines[index - 1], gene_filetype) end2 = line2[1] start1 = line1[0] if end2 < start1 - ZONE_LENGTH and line1 not in gene_lines_rand: gene_lines_rand.append(line1) elif end2 > start1 - ZONE_LENGTH: print "found a - gene with too-close neighbor" elif line1[2] == "+":
#make it a Seq object and reverse complement subseq = Seq(seq[start - 1:start + NUM - 1], unambiguous_dna) subseq = str(Seq.reverse_complement(subseq)) print "gene: " + str(start) + "-" + str( end) + " COMPLEMENTARY: ..." + subseq ############END######## sys.exit(0) gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE']) NUM_GENES = 100 gene_lines_rand = [] attempts = 0 while len(gene_lines_rand) < NUM_GENES: index = random.randint(0, len(gene_lines) - 1) gene = geneTools.getLineData(gene_lines[index], gene_filetype) if gene not in gene_lines_rand: gene_lines_rand.append(gene) attempts += 1 #print "attempts: " + str(attempts) gene_lines_rand.sort() NUM = 6 SEQUENCE_FILE = open(args['SEQUENCE_FILE'], 'r') #read off header line SEQUENCE_FILE.readline() sequence_lines = SEQUENCE_FILE.read().splitlines() seq = "".join(sequence_lines).replace('\n', '') for line in gene_lines_rand:
"parser for removing one set of (verified) genes from another set of genes" ) parser.add_argument("VERIFIED_GENES_FILE", help="path to file containing verified genes") parser.add_argument("GENES_FILE", help="path to file containing another set of genes") parser.add_argument("OUT_FILE", help="path to output file you would like to create") args = vars(parser.parse_args()) v_lines, v_filetype = geneTools.readORFLines(args["VERIFIED_GENES_FILE"]) lines, filetype = geneTools.readORFLines(args["GENES_FILE"]) toRemove = [] for v_line in v_lines: v_start, v_end, v_strand = geneTools.getLineData(v_line, v_filetype) for line in lines: start, end, strand = geneTools.getLineData(line, filetype) if v_start == start and v_end == end and v_strand == strand: toRemove.append((start, end, strand)) #have list of (start, end, strand) tuples to remove #now go through genes file and add all lines to ouput file unless it is in toRemove with open(args["OUT_FILE"], 'w') as of: for line in lines: start, end, strand = geneTools.getLineData(line, filetype) if (start, end, strand) not in toRemove: of.write(line + '\n')
parser = argparse.ArgumentParser( description="parser for a gene and a coverage file") parser.add_argument("GENES_FILE", help="path to gene file you want to use") parser.add_argument("POSITIVE_COVERAGE_FILE", help="path to coverage file you want to use") args = vars(parser.parse_args()) gene_lines, gene_filetype = geneTools.readORFLines(args['GENES_FILE']) #select a certain number of random genes NUM_GENES = 500 gene_lines_rand = [] for _ in range(NUM_GENES): line_data = geneTools.getLineData( gene_lines[random.randint(0, len(gene_lines) - 1)], gene_filetype) if line_data not in gene_lines_rand: gene_lines_rand.append(line_data) gene_lines_rand.sort() pos_lines, neg_lines = geneTools.readCoverageLines( args['POSITIVE_COVERAGE_FILE']) ind_pos = 0 ind_neg = 0 x_pos = int(pos_lines[ind_pos].split('\t')[1]) x_neg = int(neg_lines[ind_neg].split('\t')[1]) #ribosome profiling reads are performed by recording OFFSET nt upstream of the 3' end
import argparse import geneTools parser = argparse.ArgumentParser(description="parser for removing one set of (verified) genes from another set of genes") parser.add_argument("VERIFIED_GENES_FILE", help="path to file containing verified genes") parser.add_argument("GENES_FILE", help="path to file containing another set of genes") parser.add_argument("OUT_FILE", help="path to output file you would like to create") args = vars(parser.parse_args()) v_lines, v_filetype = geneTools.readORFLines(args["VERIFIED_GENES_FILE"]) lines, filetype = geneTools.readORFLines(args["GENES_FILE"]) toRemove = [] for v_line in v_lines: v_start, v_end, v_strand = geneTools.getLineData(v_line, v_filetype) for line in lines: start, end, strand = geneTools.getLineData(line, filetype) if v_start == start and v_end == end and v_strand == strand: toRemove.append((start, end, strand)) #have list of (start, end, strand) tuples to remove #now go through genes file and add all lines to ouput file unless it is in toRemove with open(args["OUT_FILE"], 'w') as of: for line in lines: start, end, strand = geneTools.getLineData(line, filetype) if (start, end, strand) not in toRemove: of.write(line + '\n')