Example #1
0
    def __init__(self, subject, query, blastn_opt="", miranda_opt=""):
        """
        General initialization function for import and command line
        """
        print("\nInitialize TargetPredict")

        # Verify files readability
        assert is_readable_file(subject), "{} is not readable".format(subject)
        assert is_readable_file(query), "{} is not readable".format(query)

        # Create a temporary folder for working files
        self.temp_dir = mkdtemp()

        # Extract gzip in temporary files if needed
        self.original_subject = subject
        self.orignal_query = query
        self.subject = gunzip (subject, self.temp_dir) if is_gziped(subject) else subject
        self.query = gunzip (query, self.temp_dir) if is_gziped(query) else query
        self.blastn_opt = blastn_opt
        self.miranda_opt = miranda_opt

        # Define additional self variables
        self.basename = "{}_{}".format(file_basename(subject), file_basename(query))
Example #2
0
    def __call__(self):
        """
        """
        ##### BLAST prediction #####

        print ("\nFinding hits with BLASTN")

        blast_hits =[]

        # Create blastn database
        print ("  Create a blast database")
        db_path = "{}/{}".format(self.temp_dir, file_basename(self.subject))
        makeblastdb_cmd = "makeblastdb -dbtype nucl -input_type fasta -in {} -out {}".format(self.subject, db_path)
        print ("\t"+makeblastdb_cmd)
        makeblastdb_out = self._yield_cmd(makeblastdb_cmd)

        with open (file_basename(self.subject)+"_makeblastdb.log", "w") as fout:
            for line in makeblastdb_out:
                fout.write(line)

        # Perform blast
        print ("  Run blastn")
        blastn_cmd = "blastn {} -num_threads {} -outfmt \"6 std qseq\" -dust no -query {} -db {}".format(self.blastn_opt, cpu_count(), self.query, db_path)
        print ("\t"+blastn_cmd)

        blastn_out = self._yield_cmd(blastn_cmd)
        for line in blastn_out:
            hit_split = line.strip().split()
            assert len(hit_split) == 13, "Invalid blast line: {}".format(line)
            blast_hits.append(BlastHit(*hit_split))

        # Suppress hits on positive strand, keep best hit per subject only and sort by score
        print ("  Process hits")
        blast_hits = [hit for hit in blast_hits if hit.strand == "-"]

        blast_hit_dict = {}
        for hit in blast_hits:
            if hit.s_id in blast_hit_dict:
                if hit.score > blast_hit_dict[hit.s_id].score:
                    blast_hit_dict[hit.s_id]=hit
            else:
                blast_hit_dict[hit.s_id]=hit
        blast_hits = blast_hit_dict.values()

        blast_hits.sort (key=lambda x: x.score, reverse=True)

        # Write a complete blast report
        print ("  Write a blast report")
        self._write_report (blast_hits, "{}_raw_blast_results.csv".format(self.basename))

        ##### MIRANDA prediction#####

        print ("\nFinding hits with MIRANDA")

        miranda_hits =[]

        # Run miranda and parse output
        print ("  Run miranda")
        miranda_cmd = "miranda {} {} -quiet {}".format(self.query, self.subject, self.miranda_opt)
        print ("\t"+miranda_cmd)

        miranda_output = self._yield_cmd(miranda_cmd)

        for line in miranda_output:
            if line[0] == ">" and line[1] != ">":
                hit_split = line[1:].strip().split()
                assert len(hit_split) == 11, "Invalid miranda line: {}".format(line)
                miranda_hits.append (MirandaHit(*hit_split))

        # Keep best hit per subject only and sort hits by score
        print ("  Process hits")
        miranda_hit_dict = {}
        for hit in miranda_hits:
            if hit.s_id in miranda_hit_dict:
                if hit.score > miranda_hit_dict[hit.s_id].score:
                    miranda_hit_dict[hit.s_id]=hit
            else:
                miranda_hit_dict[hit.s_id]=hit
        miranda_hits = miranda_hit_dict.values()

        miranda_hits.sort (key=lambda x: x.score, reverse=True)

        # Write a complete miranda report
        print ("  Write a Miranda report")
        self._write_report (miranda_hits, "{}_raw_miranda_results.csv".format(self.basename))

        # Write a report
        report_out = self.basename+".report.txt"
        print ("\nGenerate a summary report")
        with open (report_out, "w") as fout:
            fout.write ("Program {}\tDate {}\n".format(self.VERSION,str(datetime.today())))
            fout.write ("\n### OPTIONS ###\n")
            fout.write ("Subject fasta file\t{}\n".format(self.original_subject))
            fout.write ("Query fasta file\t{}\n".format(self.orignal_query))
            fout.write ("Makeblastdb command\t{}\n".format(makeblastdb_cmd))
            fout.write ("Blastn command\t{}\n".format(blastn_cmd))
            fout.write ("Miranda command\t{}\n".format(miranda_cmd))
            fout.write ("\n### COUNTS ###\n")
            fout.write ("Blast Hits found\t{}\n".format(len(blast_hits)))
            fout.write ("Miranda Hits found\t{}\n".format(len(miranda_hits)))