def read_gff(self, line, prefix): # Takes prefix b/c reader returns comments, invalids, ignored # and this method writes them to output files # That's kind of messy gffreader = GFFReader() reader = open(line, 'rb') genes, comments, invalids, ignored = gffreader.read_file(reader) for gene in genes: self.add_gene(gene) # Write comments, invalid lines and ignored features with open(prefix + "/genome.comments.gff", 'w') as comments_file: for comment in comments: comments_file.write(comment) with open(prefix + "/genome.invalid.gff", 'w') as invalid_file: for invalid in invalids: invalid_file.write(invalid) with open(prefix + "/genome.ignored.gff", 'w') as ignored_file: for item in ignored: ignored_file.write(item)
def read_gff(self, line): gffreader = GFFReader() reader = open(line, 'rb') genes = gffreader.read_file(reader) for gene in genes: self.add_gene(gene)
def main(): print("Yarr! I be Cap'n Primer.\n") # Check dependencies print("Ahoy, let's check these scurvy dependencies...") check_dependencies() # Verify files print("Yarr! Now to verify the input files...") target_fasta_path = 'targets.fasta' genome_fasta_path = 'genome.fasta' gff_path = 'genome.gff' excluded_primers_path = 'primers_to_exclude.boulder-io' options_path = 'primer3_options' for path in [target_fasta_path, genome_fasta_path, excluded_primers_path, options_path]: found_it = verify_path(path) if not found_it: exit() print("Shiver me timbers, the input files be present. Now I'll be reading them. Savvy?") # Read files print("Reading the scurvy " + target_fasta_path + " file...") fasta_reader = FastaReader() with open(target_fasta_path, 'rb') as target_file: target_seqs = fasta_reader.read(target_file) if not target_seqs: print("Yarr! Error reading target fasta. Walk the plank. " + random_insult() + "\n") sys.exit() cds_segment_lengths = None if verify_path(gff_path): gff_reader = GFFReader() print("Reading the scurvy " + gff_path + " file...") with open(gff_path, 'rb') as gff_file: cds_segment_lengths = gff_reader.read(gff_file) if not cds_segment_lengths: print("Yarr! Error reading GFF! Walk the plank. " + random_insult() + "\n") sys.exit() boulder_io_reader = BoulderIOReader() print("Reading the scurvy " + excluded_primers_path + " file...") with open(excluded_primers_path, 'rb') as exclude_file: primers_to_exclude = boulder_io_reader.read_primer3_output(exclude_file) if not primers_to_exclude: print("Yarr! Error reading excluded primers file. Walk the plank. " + random_insult() + "\n") primer3_options = "" print("Reading the scurvy " + options_path + " file...") with open(options_path, 'rb') as options_file: for line in options_file: primer3_options += line print("") # Prepare input for primer3_core print("Yarr! Preparing input for primer3_core...") boulder_formatter = BoulderIOFormatter() if cds_segment_lengths: boulder_formatter.segment_lengths = cds_segment_lengths print("Yarr! Writing input file for primer3_core...") with open("target_seqs.boulder-io", "wb") as boulderfile: # write config data first boulderfile.write(primer3_options) exclude_entries = [] for seq in target_seqs: for primer in primers_to_exclude: if primer.target_sequence.header == seq.header: exclude_entries.append(primer.to_excluded_region_entry()) boulderfile.write(boulder_formatter.format_seq(seq, exclude_entries)) exclude_entries[:] = [] # Run primer3_core print("Yarr! Running primer3_core!") os.system("primer3_core < target_seqs.boulder-io > primers.boulder-io") print("Yarr! Ran primer3_core!") # Verify if file_is_empty("primers.boulder-io"): print("Yarr! No output from primer3_core! Walk the plank. " + random_insult() + "\n") sys.exit() # Read primers from file print("Yarr! Reading output from primer3_core!") with open("primers.boulder-io", "rb") as primersfile: primers = boulder_io_reader.read_primer3_output(primersfile) # Verify again, just for fun if not primers: print("Yarr! No primers in the scurvy primers.boulder-io file! Walk the plank. " + random_insult() + "\n") sys.exit() print("Yarr! We got " + str(len(primers)) + " scurvy primers!") # Convert primers.boulder-io file to left and right primer seqs, then write to fasta print("Yarr! Making scurvy fasta files of the left and right primers from the" + " primer3_core output. Shiver me timbers!") with open("left_right_primers.fasta", "wb") as primersfasta: for primer in primers: primersfasta.write(primer.left_primer_to_fasta()) primersfasta.write(primer.right_primer_to_fasta()) # Verify that file was written if file_is_empty("left_right_primers.fasta"): print("Yarr! Left and right primers failed to write! Walk the plank. " + random_insult() + "\n") sys.exit() # BLAST PRIMER SEQUENCES AGAINST GENOME TO MAKE SURE THEY ONLY AMPLIFY ONE REGION print("Yarr! Preparing blast database!") # makeblastdb -in Bdor.Trinity.reallyfiltered.fasta -dbtype nucl os.system("makeblastdb -in genome.fasta -dbtype nucl > /dev/null") # Verify that the database was created if file_is_empty("genome.fasta.nhr"): print("Yarr! Database wasn't created. Walk the plank. " + random_insult() + "\n") sys.exit() print("Yarr! Running blast on those scurvy left and right primers...") os.system('blastall -p blastn -d genome.fasta -i left_right_primers.fasta '+ '-r 1 -q 1 -G 1 -E 2 -W 9 -F "m D" -U -m 9 -b 4 > left_right_primers.blastout') # Verify that blast produced results if file_is_empty("left_right_primers.blastout"): print("Yarr! No output from blast! Walk the plank. " + random_insult() + "\n") sys.exit() print("Yarr! Blast finished running.") # Read in blast output blast_parser = BlastOutputParser() with open("left_right_primers.blastout", "rb") as blastout: blast_results = blast_parser.parse(blastout) if not blast_results or blast_results.number_of_hits == 0: print("Yarr! Scurvy error reading blast results. Walk the plank. " + random_insult() + "\n") sys.exit() print("Yarr! We got " + str(blast_results.number_of_hits()) + " hits!") print("Yarr! Time to filter these scurvy blast hits.") # filter hits for each using e-value and alignment length cutoffs MAX_E_VALUE = 1.0 MIN_ALIGNMENT_LENGTH = 12 blast_results.filter_results(MAX_E_VALUE, MIN_ALIGNMENT_LENGTH) print("Yarr! Now we got " + str(blast_results.number_of_hits()) + " hits!") # Verify that for a given primer, the left and right each map to exactly one common region in the genome one_match_primers = [] for primer in primers: left_primer_name = primer.target_sequence.header left_primer_name += "_" + primer.primer_name + "_left" right_primer_name = primer.target_sequence.header right_primer_name += "_" + primer.primer_name + "_right" if blast_results.number_of_common_hits(left_primer_name, right_primer_name) == 1: one_match_primers.append(primer) print("Yarr! There be " + str(len(one_match_primers)) + " one-match primers. Yarr!") # TODO check that it's the right region???? # Write those primers to fasta with open("verified_primers.fasta", "wb") as primfasta: for primer in one_match_primers: primfasta.write(primer.to_fasta()) # Verify that fasta if file_is_empty("verified_primers.fasta"): print("Yarr! Failed to write verified primers to fasta! Walk the plank. " + random_insult() + "\n") sys.exit() print("Yarr! Wrote verified primers to fasta.") # Write those primers to a tsv write_primer_table("verified_primers.tsv", one_match_primers) # Verify that tsv if file_is_empty("verified_primers.tsv"): print("Yarr! Failed to write verified primers to tsv! Walk the plank. " + random_insult() + "\n") sys.exit() print("Yarr! Wrote verified primers to tsv.")