Beispiel #1
0
    def parse_seq(self, web_page):
        seq_id = self._find_seq_id(web_page)
        if seq_id is None:
            return None

        seq = GoSequence(seq_id, web_page)
        seq.extract_ID()
        seq.parse_go_term(self.e_threshold, self.debug)
        if seq_id and self.key_list and seq_id not in self.key_list:
            warnings.warn("Seq_ID %s doesn't exist in the list %s" % (seq_id, self.key_list))


        return seq
Beispiel #2
0
    def test_parse_seq(self):
        infile = self.data_dir + "BLAST/AmiGOBLASTResults_Gene_Local.html"
        webpage = open(infile, "r")
        data = ""
        for line in webpage:
            data += line
#         print data
        seq = GoSequence("gene5", data)
        seq.extract_ID()
        seq.parse_go_term(self.e_threshold, self.debug)


        expected = set(['GO:0005125', 'GO:0016311', 'GO:0046360', 'GO:0003674', 'GO:0030170', 'GO:0004795', 'GO:0005737', 'GO:0006566', 'GO:0005615', 'GO:0005634', 'GO:0006520', 'GO:0005524', 'GO:0008150', 'GO:0070905', 'GO:0008152', 'GO:0009071', 'GO:0008652', 'GO:0006897', 'GO:0005829', 'GO:0005575', 'GO:0009088', 'GO:0004765', 'GO:0016829'])

        self.assertEqual(expected, seq.combined_terms,
                         "Error!! \nExpected: %s\nActual: %s\n" % (sorted(expected), sorted(seq.combined_terms)))
Beispiel #3
0
    def run_single(self, debug=0):
        warnings.simplefilter('always')
        warnings.warn("Deprecated method: run_BLAST.run_single\nBLAST single sequence, slow!! ", DeprecationWarning)

        print("Running AmiGO:BLAST")

        temp_output = open(self.outfile + "_temp", "w")
        if self.record_index == None:
            self.record_index = SeqIO.index(self.infile, "fasta")

        all_orfs = dict()

        for key in self.record_index:
            print key
            this_seq = GoSequence(key, self.record_index[key].seq)  # Bio.SeqRecord.SeqRecord
            this_seq.blast_AmiGO()
            this_seq.extract_ID()
            this_seq.parse_go_term(self.e_threshold)
#            seq.combined_terms
            self.results[key] = this_seq
            all_orfs[key] = this_seq.combined_terms
#            print this_seq
#            print this_seq.combined_terms
            temp_output.write("%s \t %s\n" % (key, this_seq.combined_terms))
#            temp_output.flush()
#        temp_output.close()

        self.counter = self.create_counter(all_orfs)
#        new_outfile = self.init_output(self.counter,0)
#        self.sample = self.update_sample_from_counters(new_outfile, self.counter)
#       hasattr

        output_csv(self.outfile, self.header, self.counter)
Beispiel #4
0
    def test_GoConnector_long(self):

        data = self.record_index["lcl|AE014075.1_gene_3"].seq  # # good
        seq = GoSequence("G3", None)
        seq.blast_AmiGO(data)
        seq.extract_ID()
        seq.parse_go_term(self.e_threshold)

        expected = set(['GO:0071470', 'GO:0016310', 'GO:0005886', 'GO:0009067', 'GO:0000023', 'GO:0016597', 'GO:0043085', 'GO:0016491', 'GO:0005737', 'GO:0050661', 'GO:0040007', 'GO:0005618', 'GO:0009570', 'GO:0005634', 'GO:0006520', 'GO:0019877', 'GO:0000166', 'GO:0016740', 'GO:0009097', 'GO:0009090', 'GO:0019252', 'GO:0019761', 'GO:0016301', 'GO:0008152', 'GO:0009088', 'GO:0055114', 'GO:0009507', 'GO:0008652', 'GO:0005829', 'GO:0006555', 'GO:0004412', 'GO:0005575', 'GO:0009089', 'GO:0005524', 'GO:0006164', 'GO:0006531', 'GO:0009086', 'GO:0004072', 'GO:0009082'])
        self.assertEqual(expected, seq.combined_terms)
Beispiel #5
0
    def test_GoConnector_short(self):

        data = self.record_index["lcl|AE014075.1_gene_2"].seq  # # good
        seq = GoSequence("G2", None)
        seq.blast_AmiGO(data)
        seq.extract_ID()
        seq.parse_go_term(self.e_threshold)

        expected = set(['GO:0004803', 'GO:0006313'])
        self.assertEqual(expected, seq.combined_terms)
Beispiel #6
0
    def amigo_batch_resume(self):

        print "RESUME!!! Tempfile exist: %s!" % self.tempfile
        tempout = open(self.tempfile, "r+")

        t2File = self.tempfile + "object"
        with open(t2File, 'r') as f:
            self.web_session_list = pickle.load(f)


        total_BLAST = len(self.web_session_list)

        self.stored_session_id_result = []
        self.stored_web_session_info = [0] * total_BLAST
        line = ""


        is_parse_result = False
        is_saving_completed = False
        for line in tempout.readlines():
            line = line.strip()
#             print line
            if line.startswith(STORE_SESSION_ID_STRING):
                index = line.split(self.DELIM)
                sid = index[2]
#                 self.stored_web_session_info.append((index[1], sid))
                self.stored_web_session_info[int(index[1])] = (index[1], sid)

            if line.startswith(END_SESSION_ID_STRING):
                is_saving_completed = True
            if line.startswith(END_STORE_RESULT_STRING):
                is_parse_result = False

            if is_parse_result and line.startswith(SEQ_ID_STRING):
                index = line.split(self.DELIM)  # Use $ becasue GO:000251 terms got : already
                seqid = index[1]
                seqSet = index[2]
#                 print seqid, seqSet
#                 sset = Ste
                seq = GoSequence(seqid, seqSet)
                seq.combined_terms = eval(seqSet)
#                 print seq
                self.all_seqs.append(seq)

            if line.startswith(STORE_RESULT_STRING):
                index = line.split(self.DELIM)
                sid = index[1]
                self.stored_session_id_result.append(sid)
                is_parse_result = True


        if self.debug:  # # These might have to go
            print "==DEBUG: Full saved session_list:", self.stored_web_session_info
            print "==DEBUG: Stored sessios_results:", self.stored_session_id_result

        if not is_saving_completed:
            print "===Warning!! Not all session_ids are stored, recreate using partial batch mode"
            return self.amigo_batch_mode_resume_partial()



        stored_session_id_only = self.rebuild_web_session_list_from_tempobject()
        complete_index_boolean = [x in self.stored_session_id_result for x in stored_session_id_only]
#         stored_session_id_only = [ x[1] for x in self.stored_web_session_info]
#         missiing_session_id = set(stored_session_id_only) - set(self.stored_session_id_result)
#         missiing_session_id = list(missiing_session_id)
        missing_length = total_BLAST - sum(complete_index_boolean)
        print "Missing %d/%d session(s)!" % (missing_length, total_BLAST)
        if self.debug:
            missing_session_index = [i for i, is_comp in enumerate(complete_index_boolean) if not is_comp]
            print "==DEBUG: Missing %d/%d session(s): Index: %s" % (len(missing_session_index), total_BLAST, missing_session_index)
            print stored_session_id_only
            print self.stored_session_id_result
            print complete_index_boolean
#         exit()

        self.retrieving_all_session_results(complete_index_boolean, tempout)

        tempout.close()
        print "End amigo_batch_resume, number of missed session: %d" % missing_length
        return missing_length