def check_raw(self, filename, id, raw, **kwargs):
        """Index filename using keyword arguments, check get_raw(id)==raw."""
        idx = SearchIO.index(filename, self.fmt, **kwargs)
        raw = _as_bytes(raw)
        # Anticipate cases where the raw string and/or file uses different
        # newline characters ~ we set everything to \n.
        new = idx.get_raw(id)
        self.assertTrue(isinstance(new, bytes),
                        "Didn't get bytes from %s get_raw" % self.fmt)
        self.assertEqual(raw.replace(b'\r\n', b'\n'),
                         new.replace(b'\r\n', b'\n'))
        idx.close()

        # Now again, but using SQLite backend
        if sqlite3:
            idx = SearchIO.index_db(":memory:", filename, self.fmt, **kwargs)
            new = idx.get_raw(id)
            self.assertTrue(isinstance(new, bytes),
                            "Didn't get bytes from %s get_raw" % self.fmt)
            self.assertEqual(raw.replace(b'\r\n', b'\n'),
                             new.replace(b'\r\n', b'\n'))
            idx.close()

        if os.path.isfile(filename + ".bgz"):
            # Do the tests again with the BGZF compressed file
            print("[BONUS %s.bgz]" % filename)
            self.check_raw(filename + ".bgz", id, raw, **kwargs)
Beispiel #2
0
def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    not_significant_ids = IdList()
    not_found_ids = IdList()

    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")

    for query in hmm_dict:
        if hmm_dict[query].hits:
            if hmm_dict[query][0].is_included:
                out_fd.write(
                    "%s\t%s\t%s\t%s\n" %
                    (query, hmm_dict[query][0].id, hmm_dict[query][0].evalue,
                     hmm_dict[query][0].bitscore))
            else:
                not_significant_ids.append(query)
        else:
            not_found_ids.append(query)

    if args.output != "stdout":
        out_fd.close()

    os.remove(index_file)
    return not_significant_ids, not_found_ids
    def check_raw(self, filename, id, raw, **kwargs):
        """Index filename using keyword arguments, check get_raw(id)==raw."""
        idx = SearchIO.index(filename, self.fmt, **kwargs)
        raw = _as_bytes(raw)
        # Anticipate cases where the raw string and/or file uses different
        # newline characters ~ we set everything to \n.
        new = idx.get_raw(id)
        self.assertTrue(isinstance(new, bytes),
                        "Didn't get bytes from %s get_raw" % self.fmt)
        self.assertEqual(raw.replace(b'\r\n', b'\n'),
                         new.replace(b'\r\n', b'\n'))
        idx.close()

        # Now again, but using SQLite backend
        if sqlite3:
            idx = SearchIO.index_db(":memory:", filename, self.fmt, **kwargs)
            new = idx.get_raw(id)
            self.assertTrue(isinstance(new, bytes),
                            "Didn't get bytes from %s get_raw" % self.fmt)
            self.assertEqual(raw.replace(b'\r\n', b'\n'),
                             new.replace(b'\r\n', b'\n'))
            idx.close()

        if os.path.isfile(filename + ".bgz"):
            # Do the tests again with the BGZF compressed file
            print("[BONUS %s.bgz]" % filename)
            self.check_raw(filename + ".bgz", id, raw, **kwargs)
Beispiel #4
0
    def extract_top_hits(hmmer_hits, top_hits_file, top_hits_ids_file=None,
                         not_significant_ids_file=None, not_found_ids_file=None):
        top_hits_ids = IdList()
        not_significant_ids = IdList()
        not_found_ids = IdList()

        index_file = "hmmer_hits.tmp.idx"
        hmm_dict = SearchIO.index_db(index_file, hmmer_hits, "hmmer3-text")

        out_fd = open(top_hits_file, "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")

        for query in hmm_dict:
            if hmm_dict[query].hits:
                if hmm_dict[query][0].is_included:
                    out_fd.write("%s\t%s\t%s\t%s\n" % (query, hmm_dict[query][0].id, hmm_dict[query][0].evalue,
                                                       hmm_dict[query][0].bitscore))
                    top_hits_ids.append(query)
                else:
                    not_significant_ids.append(query)
            else:
                not_found_ids.append(query)

        os.remove(index_file)

        if not_significant_ids_file:
            not_significant_ids.write(not_significant_ids_file)

        if not_found_ids_file:
            not_found_ids.write(not_found_ids_file)

        if top_hits_ids_file:
            top_hits_ids.write(top_hits_ids_file)
    def check_index(self, filename, format, **kwargs):
        # check if Python3 installation has sqlite3
        try:
            import sqlite3
        except ImportError:
            sqlite3 = None

        parsed = list(SearchIO.parse(filename, format, **kwargs))
        # compare values by index
        indexed = SearchIO.index(filename, format, **kwargs)
        self.assertEqual(len(parsed), len(indexed.keys()))
        # compare values by index_db, only if sqlite3 is present
        if sqlite3 is not None:
            db_indexed = SearchIO.index_db(':memory:', [filename], format, **kwargs)
            self.assertEqual(len(parsed), len(db_indexed.keys()))

        for qres in parsed:
            idx_qres = indexed[qres.id]
            # parsed and indexed qresult are different objects!
            self.assertNotEqual(id(qres), id(idx_qres))
            # but they should have the same attribute values
            self.assertTrue(compare_search_obj(qres, idx_qres))
            # sqlite3 comparison, only if it's present
            if sqlite3 is not None:
                dbidx_qres = db_indexed[qres.id]
                self.assertNotEqual(id(qres), id(dbidx_qres))
                self.assertTrue(compare_search_obj(qres, dbidx_qres))

        indexed._proxy._handle.close()  # TODO - Better solution
        if sqlite3 is not None:
            db_indexed.close()
            db_indexed._con.close()
Beispiel #6
0
    def parse_search_file(input_file, mode, format="hmmer3-text", index_file=None):
        if mode == "index_db" or ((not isinstance(input_file, str)) and (len(input_file) > 1)):
            index = index_file if index_file else "tmp.idx"
            seq_dict = SearchIO.index_db(index, [input_file] if isinstance(input_file, str) else input_file, format=format)
        elif mode == "index":
            seq_dict = SearchIO.index(input_file if isinstance(input_file, str) else input_file[0], format=format)
        elif mode == "parse":
            seq_dict = OrderedDict()
            for record in SearchIO.parse(input_file if isinstance(input_file, str) else input_file[0], format=format):
                seq_dict[record.id] = record
            #seq_dict = SeqIO.to_dict(SeqIO.parse(input_file if isinstance(input_file, str) else input_file[0], format=format))

        return seq_dict
    def check_index(self, filename, format, **kwargs):
        # check if Python3 installation has sqlite3
        try:
            import sqlite3
        except ImportError:
            sqlite3 = None

        if filename.endswith(".bgz"):
            handle = gzip.open(filename)
            parsed = list(SearchIO.parse(handle, format, **kwargs))
            handle.close()
        else:
            parsed = list(SearchIO.parse(filename, format, **kwargs))
        # compare values by index
        indexed = SearchIO.index(filename, format, **kwargs)
        self.assertEqual(
            len(parsed), len(indexed),
            "Should be %i records in %s, index says %i" %
            (len(parsed), filename, len(indexed)))
        # compare values by index_db, only if sqlite3 is present
        if sqlite3 is not None:
            db_indexed = SearchIO.index_db(':memory:', [filename], format,
                                           **kwargs)
            self.assertEqual(
                len(parsed), len(db_indexed),
                "Should be %i records in %s, index_db says %i" %
                (len(parsed), filename, len(db_indexed)))

        for qres in parsed:
            idx_qres = indexed[qres.id]
            # parsed and indexed qresult are different objects!
            self.assertNotEqual(id(qres), id(idx_qres))
            # but they should have the same attribute values
            self.assertTrue(compare_search_obj(qres, idx_qres))
            # sqlite3 comparison, only if it's present
            if sqlite3 is not None:
                dbidx_qres = db_indexed[qres.id]
                self.assertNotEqual(id(qres), id(dbidx_qres))
                self.assertTrue(compare_search_obj(qres, dbidx_qres))

        indexed.close()
        if sqlite3 is not None:
            db_indexed.close()
            db_indexed._con.close()

        if os.path.isfile(filename + ".bgz"):
            # Do the tests again with the BGZF compressed file
            print("[BONUS %s.bgz]" % filename)
            self.check_index(filename + ".bgz", format, **kwargs)
    def check_index(self, filename, format, **kwargs):
        # check if Python3 installation has sqlite3
        try:
            import sqlite3
        except ImportError:
            sqlite3 = None

        if filename.endswith(".bgz"):
            handle = gzip.open(filename)
            parsed = list(SearchIO.parse(handle, format, **kwargs))
            handle.close()
        else:
            parsed = list(SearchIO.parse(filename, format, **kwargs))
        # compare values by index
        indexed = SearchIO.index(filename, format, **kwargs)
        self.assertEqual(len(parsed), len(indexed),
                         "Should be %i records in %s, index says %i"
                         % (len(parsed), filename, len(indexed)))
        # compare values by index_db, only if sqlite3 is present
        if sqlite3 is not None:
            db_indexed = SearchIO.index_db(':memory:', [filename], format, **kwargs)
            self.assertEqual(len(parsed), len(db_indexed),
                             "Should be %i records in %s, index_db says %i"
                             % (len(parsed), filename, len(db_indexed)))

        for qres in parsed:
            idx_qres = indexed[qres.id]
            # parsed and indexed qresult are different objects!
            self.assertNotEqual(id(qres), id(idx_qres))
            # but they should have the same attribute values
            self.assertTrue(compare_search_obj(qres, idx_qres))
            # sqlite3 comparison, only if it's present
            if sqlite3 is not None:
                dbidx_qres = db_indexed[qres.id]
                self.assertNotEqual(id(qres), id(dbidx_qres))
                self.assertTrue(compare_search_obj(qres, dbidx_qres))

        indexed.close()
        if sqlite3 is not None:
            db_indexed.close()
            db_indexed._con.close()

        if os.path.isfile(filename + ".bgz"):
            # Do the tests again with the BGZF compressed file
            print("[BONUS %s.bgz]" % filename)
            self.check_index(filename + ".bgz", format, **kwargs)
    def check_raw(self, filename, id, raw, **kwargs):
        """Index filename using **kwargs, check get_raw(id)==raw."""
        idx = SearchIO.index(filename, self.fmt, **kwargs)
        raw = _as_bytes(raw)
        self.assertEqual(raw, idx.get_raw(id))
        idx.close()

        #Now again, but using SQLite backend
        if sqlite3:
            idx = SearchIO.index_db(":memory:", filename, self.fmt, **kwargs)
            self.assertEqual(raw, idx.get_raw(id))
            idx.close()

        if os.path.isfile(filename + ".bgz"):
            #Do the tests again with the BGZF compressed file
            print "[BONUS %s.bgz]" % filename
            self.check_raw(filename + ".bgz", id, raw, **kwargs)
Beispiel #10
0
    def check_index(self, filename, format, **kwargs):
        if filename.endswith(".bgz"):
            with gzip.open(filename) as handle:
                parsed = list(SearchIO.parse(handle, format, **kwargs))
        else:
            parsed = list(SearchIO.parse(filename, format, **kwargs))
        # compare values by index
        indexed = SearchIO.index(filename, format, **kwargs)
        self.assertEqual(
            len(parsed),
            len(indexed),
            "Should be %i records in %s, index says %i" %
            (len(parsed), filename, len(indexed)),
        )
        # compare values by index_db, only if sqlite3 is present
        if sqlite3 is not None:
            db_indexed = SearchIO.index_db(":memory:", [filename], format,
                                           **kwargs)
            self.assertEqual(
                len(parsed),
                len(db_indexed),
                "Should be %i records in %s, index_db says %i" %
                (len(parsed), filename, len(db_indexed)),
            )

        for qres in parsed:
            idx_qres = indexed[qres.id]
            # parsed and indexed qresult are different objects!
            self.assertNotEqual(id(qres), id(idx_qres))
            # but they should have the same attribute values
            self.compare_search_obj(qres, idx_qres)
            # sqlite3 comparison, only if it's present
            if sqlite3 is not None:
                dbidx_qres = db_indexed[qres.id]
                self.assertNotEqual(id(qres), id(dbidx_qres))
                self.compare_search_obj(qres, dbidx_qres)

        indexed.close()
        if sqlite3 is not None:
            db_indexed.close()
            db_indexed._con.close()

        if os.path.isfile(filename + ".bgz"):
            # Do the tests again with the BGZF compressed file
            print(f"[BONUS {filename}.bgz]")
            self.check_index(filename + ".bgz", format, **kwargs)
Beispiel #11
0
def handle_input(filename):
    sys.stdout.write("Handling %s\n" % filename)
    prefix = FileRoutines.split_filename(filename)[1]
    index_file = "%s.tmp.idx" % prefix
    hmm_dict = SearchIO.index_db(index_file, filename, args.format)
    if args.output == "stdout":
        out_fd = sys.stdout
    else:
        out_fd = open("%s%s.top_hits" % (args.top_hits_dir, prefix), "w")
        out_fd.write("#query\thit\tevalue\tbitscore\n")
    for family in hmm_dict:
        #print hmm_dict[key]
        for hit in hmm_dict[family]:
            if hit.is_included:
                out_fd.write("%s\t%s\t%s\t%s\n" %
                             (family, hit.id, hit.evalue, hit.bitscore))
    if args.output != "stdout":
        out_fd.close()

    os.remove(index_file)