Example #1
0
def predict(input_sequence):
    sys.path.append(settings["SERENDIP_DIR"])
    from sequence.entropy.lib.seq_lib import FastaParser
    from Bio.Blast import NCBIStandalone

    sequence_hash = get_sequence_hash(input_sequence)
    results_path = os.path.join(settings["RESULTS_DIR"], sequence_hash)
    lock_path = results_path + ".lock"

    with FileLock(lock_path):

        if os.path.isfile(results_path):
            return parse_serendip_results(open(results_path, 'r').read())

        input_id = 'input'
        out_dir = tempfile.mkdtemp()

        try:
            out_file = os.path.join(out_dir, 'output.myrsa')
            input_fasta_path = os.path.join(out_dir, input_id + '.fa')

            # Netsurf
            open(input_fasta_path, 'w').write(">%s\n%s" % (input_id, input_sequence))
            cmd = [settings["NETSURF_EXE"],
                   "-i", input_fasta_path,
                   "-d", settings["NR70_DB"], "-a", "-k",
                   "-T", out_dir, "-o", out_file]
            _log.info(cmd)
            subprocess.call(cmd)

            blast_parser = NCBIStandalone.PSIBlastParser()
            blast_record = blast_parser.parse(open(os.path.join(out_dir, input_id + '.blastout'), 'r'))
            if blast_record.rounds <= 0:
                raise Exception("no netsurf hits")
            hit_titles = [alignment.title[1:]
                          for alignment in blast_record.rounds[-1].alignments]

            id_path = os.path.join(out_dir, input_id + '.blastout_id')
            with open(id_path, 'w') as f:
                for hit_title in hit_titles:
                    f.write(hit_title + '\n')

            blast_hits_path = os.path.join(out_dir, 'output_seqs.fa')
            cmd = [settings["FASTACMD_EXE"],
                   "-d", settings["NR70_DB"],
                   '-i', id_path,
                   '-o', blast_hits_path]
            _log.info(cmd)
            result = subprocess.call(cmd)


            if result == 0:  # We have blast hits

                # Netsurf on hits
                netsurf_append_path = os.path.join(out_dir, 'output_other.myrsa')
                subtasks = [netsurf_hit.delay(str(seq))
                            for seq in FastaParser(open(blast_hits_path, 'r'))]

                with open(netsurf_append_path, 'w') as f:
                    for subtask in subtasks:
                        f.write(subtask.get())

                # Append input sequence to blast hits for alignment as input for entropy and DynaMine
                with open(blast_hits_path, 'a') as f:
                    f.write('>input\n' + input_sequence + '\n')

                # Make alignment using muscle
                alignment_path = os.path.join(out_dir, "output.ali")
                cmd = [settings["MUSCLE_EXE"],
                       "-in", blast_hits_path,
                       "-out", alignment_path]
                _log.info(cmd)
                subprocess.call(cmd)

            else:
                raise Exception("No blast hits for input sequence")


            # Alignment position entropies
            entropy_path = os.path.join(out_dir, "output.entropy")
            hit_sequences = FastaParser(open(alignment_path, 'r'))
            hit_sequences.frequencies().normalize()
            hit_entropies = hit_sequences.frequencies().entropies()
            with open(entropy_path, 'w') as f:
                n = 0
                for entropy in hit_entropies:
                    n += 1
                    f.write(str(n) + ' ' + str(entropy) + '\n')


            # Run dynamine on each sequence
            dynamine_fasta_path = os.path.join(out_dir, "output_seq.fasta")
            for seq in FastaParser(open(blast_hits_path, 'r')):
                # We use this file name, to avoid confusing the rest of the script:
                with open(dynamine_fasta_path, 'w') as f:
                    f.write(str(seq))
                cmd = [settings["DYNAMINE_EXE"], "-a", dynamine_fasta_path]
                _log.info(cmd)
                subprocess.call(cmd, env=dict(os.environ,
                                              **{"PYTHONPATH":"/usr/local/lib/python2.7/site-packages/"}))


            # Run prediction script
            result_testing_path = os.path.join(settings["SERENDIP_DIR"], "sequence", "Result_Testing")
            combined_path = os.path.join(settings["SERENDIP_DIR"], "sequence", "five_models_combined")
            dynamine_path = os.path.splitext(dynamine_fasta_path)[0]
            cmd = [settings["RSCRIPT_EXE"], settings["RF_SCRIPT"], input_id,
                   alignment_path, entropy_path, netsurf_append_path,
                   dynamine_path, out_file, result_testing_path, combined_path]
            _log.info(cmd)
            os.chdir(out_dir)
            subprocess.call(cmd)

            output_result_path = os.path.join(out_dir, input_id + '.out')
            if not os.path.isfile(output_result_path):
                raise Exception("No ouput generated")

            shutil.copyfile(output_result_path, results_path)

            data = parse_serendip_results(open(results_path, 'r').read())

            # Start making the scene:
            yasara_scene.delay(data)

            return data

        finally:
            if os.path.isdir(out_dir):
                shutil.rmtree(out_dir)
Example #2
0
for test in all_tests:
    print "*" * 50, "TESTING %s" % test
    datafile = os.path.join("Blast", test)
    scanner.feed(open(datafile), ParserSupport.AbstractConsumer())

for test in detailed_tests:
    print "*" * 50, "TESTING %s" % test
    datafile = os.path.join("Blast", test)
    scanner.feed(open(datafile), ParserSupport.TaggingConsumer())

### BlastParser

print "Running tests on BlastParser"

parser = NCBIStandalone.BlastParser()
pb_parser = NCBIStandalone.PSIBlastParser()
for test in all_tests:
    print "*" * 50, "TESTING %s" % test
    datafile = os.path.join("Blast", test)
    try:
        # First, try parsing it with the normal parser.
        rec = parser.parse(open(datafile))
    except ValueError, x:
        # If it complains that the input is psiblast data, then
        # parse it with the psiblast parser.
        if string.find(str(x), 'PSI-BLAST data') >= 0:
            rec = pb_parser.parse(open(datafile))
        else:
            raise

### Blast Record
Example #3
0
    def ReadBlast(self, file, OUT, iszipped=0, is_psiblast=None):

        output = open(OUT, "w")
        self.selfhits = []
        if is_psiblast:
            print >> sys.stderr, 'Parsing PSI-Blast'
            self.parser = NCBIStandalone.PSIBlastParser()
        else:
            self.parser = NCBIStandalone.BlastParser()
        if file[-3:] == '.gz' or iszipped:
            handle = gzip.open(file)
        else:
            handle = open(file)

        self.iter = NCBIStandalone.Iterator(handle=handle, parser=self.parser)
        self.blastDict = {}

        while 1:
            try:
                rec = self.iter.next()
                if not rec: break
            except:
                sys.stderr.write(
                    'Can\'t iterate on blast records anymore. Abort.\n')
                import traceback
                traceback.print_exc()
                return 'Error parsing %s' % file

            self.query = rec.query.split(" ")[
                0]  ##  blast_record.query.split(" ")[0]
            self.length = rec.query_letters

            if self.length < self.min_size:
                self.printer("Does not meet the minimum length " +
                             str(self.min_size))
                break

            if is_psiblast: rec = rec.rounds[-1]

            # each alignment is one potential hit
            for n, alignment in enumerate(rec.alignments):

                hsp = alignment.hsps[0]  #no multiple hsps
                alnlength = hsp.align_length
                hit = alignment.title
                #targetlength = alignment.length
                #m = re.search("sp\|([A-Z0-9]+)\|([A-Z0-9_]+) ?(.+)?", alignment.title)

                m = re.search("sp\|(.+?)\|(.+?) (.+)?", alignment.title)
                if m:  # pyphynr blast result
                    hit_sp_ac = m.group(1)
                    hit_sp_id = m.group(2)
                    hit_sp_note = m.group(3)
                elif alignment.title[
                        0] == '>':  # result from qadditional blast databases
                    hit_sp_ac = None
                    hit_sp_id = alignment.title[1:].split()[0]
                    hit_sp_note = None
                else:
                    hit_sp_ac = None
                    hit_sp_id = None
                    hit_sp_note = None

                self.printer(hit_sp_id)
                similarity = hsp.positives[0] / float(hsp.positives[1]) * 100
                if float(hsp.expect) <= float(self.HSP_max_evalue):
                    if float(similarity) >= int(self.HSP_minimal_positives):
                        coverage = hsp.positives[1] / float(self.length) * 100
                        if float(coverage) >= int(self.HSP_minimal_coverage):
                            #targetcoverage = hsp.positives[1]/float(targetlength)*100
                            #if  float(targetcoverage) > int(self.HSP_minimal_targetcov):
                            #self.compatibles.append((hit_sp_ac, hit))
                            #hitlist = [hit_sp_id, n+1 , hsp.positives[0]/float(hsp.positives[1])*100, hsp.positives[1]/float(self.length)*100, hsp.positives[1]/float(targetlength)*100, hsp.score, hsp.expect]
                            hitlist = [
                                hit_sp_id, hsp.positives[0] /
                                float(hsp.positives[1]) * 100,
                                hsp.positives[1] / float(self.length) * 100,
                                hsp.score, hsp.expect
                            ]
                            if self.cB: self.createblastDict(query, hitlist)
                            output.write("%s\t" % (self.query)),
                            for element in hitlist:
                                output.write("%s\t" % element),
                            output.write("\n")
        output.close()
        handle.close()
        return None