Python BLASTUtilities 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: MetaBinner

클래스/타입: BLASTUtilities

hotexamples.com에서의 예제들: 8

Python BLASTUtilities - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 MetaBinner.BLASTUtilities에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

BLASTMultiProcessing(2)

BLASTMultiProcessingParser(2)

BLASTResult(2)

parse_blast(2)

do_blast(1)

예제 #1

파일 보기

 def test_multi_processing_blast(self):
     """ Test that a set of blast runs using multiprocessing run """
     fn_database = os.path.join(self.datadir, "mini_nr", "nr_test2")
     blaster = BLASTUtilities.BLASTMultiProcessing()
     parser = SeqIO.parse(fn_database, "fasta")
     identifier = "temp.{0}"
     i = 0
     n_seqs = 20
     for seq_record in parser:
         if i == n_seqs:
             break
         blaster.add_sequence(seq_record.seq.tostring(),
                              identifier.format(i), fn_database)
         i += 1
     fn_identifier_pairs = blaster.run()
     self.assertEqual(len(fn_identifier_pairs), n_seqs,
                      "Unexpected number of BLAST results")
     blast_parser = BLASTUtilities.BLASTMultiProcessingParser()
     for i, fn in fn_identifier_pairs:
         blast_parser.add_file(identifier.format(i), fn)
     parsing_results = blast_parser.run()
     l = len(parsing_results)
     self.assertEqual(l, n_seqs,
                      "Unexpected number of  parsed results {0}".format(l))
     for i, fn in fn_identifier_pairs:
         os.remove(fn)

예제 #2

파일 보기

파일: test_blast_related_functions.py 프로젝트: javang/engr230_ngs

    def test_do_blast(self):
        """ Test that a BLAST subprocess runs """
        fn_sequence = os.path.join(self.datadir, "2061973757.fasta")
        fn_database = os.path.join(self.datadir, "mini_nr", "proteins")

        parser = SeqIO.parse(fn_sequence, "fasta")
        S = parser.next()
        identifier = "nothing"
        fn_output= BLASTUtilities.do_blast(S.seq.tostring(),identifier, fn_database)
        self.assertTrue(os.path.exists(fn_output),"BLAST did not produce the output file")

        results = BLASTUtilities.parse_blast(fn_output)
        self.assertEqual(len(results.titles),1)
        self.assertAlmostEqual(947.577, results.bits[0],delta=0.001, msg="Score not correct")
        self.assertAlmostEqual(0, results.evalues[0],delta=1e-5, msg="E-value not correct")
        os.remove(fn_output)

예제 #3

파일 보기

    def test_description_parsing(self):
        """ Test the parsing of a blast description

        """
        # File with all the microoganisms in nr.COG1528
        fn_check_file = os.path.join(self.datadir, "nr.COG1528.check_file")
        organisms = set()
        for words in csv.reader(open(fn_check_file), delimiter=" "):
            if len(words) >= 2:
                genus = words[0].lower()
                species = words[1].lower()
                name = genus + " " + species
                organisms.add(name)
        log.debug("organisms in the check file: %s", organisms)
        # Parse all fasta descriptions
        fn_database = os.path.join(self.datadir, "nr.COG1528")
        parser = SeqIO.parse(fn_database, "fasta")
        organisms_parsed = set()
        p = BLASTUtilities.BLASTResult()
        for seq_record in parser:
            map(organisms_parsed.add,
                p.parse_organisms(seq_record.description))
        log.debug("organisms_parsed: %s", organisms_parsed)
        self.assertEqual(len(organisms), len(organisms_parsed),
                         "The number of organisms parsed is not correct")

예제 #4

파일 보기

파일: test_blast_related_functions.py 프로젝트: javang/engr230_ngs

 def test_parse_blast(self):
     """ Parse a blast result with multiple entries """
     fn = os.path.join(self.datadir, "2061976712.xml")
     results = BLASTUtilities.parse_blast(fn,25)
     self.assertEqual(len(results.titles),25)
     self.assertEqual(len(results.evalues),25)
     self.assertEqual(len(results.scores),25)
     self.assertEqual(len(results.bits),25)

예제 #5

파일 보기

 def test_parse_blast(self):
     """ Parse a blast result with multiple entries """
     fn = os.path.join(self.datadir, "2061976712.xml")
     results = BLASTUtilities.parse_blast(fn, 25)
     self.assertEqual(len(results.titles), 25)
     self.assertEqual(len(results.evalues), 25)
     self.assertEqual(len(results.scores), 25)
     self.assertEqual(len(results.bits), 25)

예제 #6

파일 보기

def blast(seqs):
    """
        Blast a set of sequences and parse the results. The function does calls the
        MultiProcessing versions
        @seqs A list of tuples of (sequence, identifier for the sequence, database to use
        for the blast procedure)
        @return Returns a list of BLASTResult objects
    """
    if len(seqs) == 0:
       raise ValueError("No sequences provided")
    blaster = BLASTUtilities.BLASTMultiProcessing()
    log.debug("Running blast from %s sequences",len(seqs))
    for seq in seqs:
        blaster.add_sequence(*seq)
    fns_blast_output = blaster.run()
    parser = BLASTUtilities.BLASTMultiProcessingParser()
    for identifier, fn in fns_blast_output:
        parser.add_file(identifier, fn)
    parsing_results = parser.run()
    # clean xmls after parsing
    for i,fn in fns_blast_output:
        os.remove(fn)
    return parsing_results

예제 #7

파일 보기

    def test_do_blast(self):
        """ Test that a BLAST subprocess runs """
        fn_sequence = os.path.join(self.datadir, "2061973757.fasta")
        fn_database = os.path.join(self.datadir, "mini_nr", "proteins")

        parser = SeqIO.parse(fn_sequence, "fasta")
        S = parser.next()
        identifier = "nothing"
        fn_output = BLASTUtilities.do_blast(S.seq.tostring(), identifier,
                                            fn_database)
        self.assertTrue(os.path.exists(fn_output),
                        "BLAST did not produce the output file")

        results = BLASTUtilities.parse_blast(fn_output)
        self.assertEqual(len(results.titles), 1)
        self.assertAlmostEqual(947.577,
                               results.bits[0],
                               delta=0.001,
                               msg="Score not correct")
        self.assertAlmostEqual(0,
                               results.evalues[0],
                               delta=1e-5,
                               msg="E-value not correct")
        os.remove(fn_output)

예제 #8

파일 보기

def assign_genus_to_scaffolds(args):
    """ Assign genus to scaffolds in the database

    The function:
    1) Reads the genes in the database that belong to a given COG
    2) Reads the BLAST results for each of the genes.
    3) Recovers the best hit (genus and bit score) for the gene and
    identifies the scaffold where the gene is located
    4) Assigns the genus found in the hit to the scaffold.

    Various scaffolds can have different assignments. To select one assignment,
    1) sum the bit scores for the each of the genus assigned to a scaffold.
    2) Chose the genus with the largest total bit score

    Finally, store the assignments in the database
    """
    db = MetagenomeDatabase.MetagenomeDatabase(args.fn_database)
    names = db.get_tables_names()
    if not db.GenesTable in names:
        raise ValueError("The database does not have a table of genes")
    if not db.BlastResultsTable in names:
        raise ValueError("The database does not have a table of BLAST results")
    # Read file marker cogs
    fhandle = open(args.fn_marker_cogs, "rU")
    reader = csv.reader(fhandle, delimiter=" ")
    marker_cogs = frozenset([row[0] for row in reader])
    if len(marker_cogs) == 0:
        raise ValueError("No marker COGs provided")

    if db.ScaffoldsAssignmentsTable in names:
        db.drop_table(db.ScaffoldsAssignmentsTable)
    db.create_scaffold_assignments_table()

    blast_result = BLASTUtilities.BLASTResult()
    scaffolds_dict = {}
    for cog_id in marker_cogs:
        # read the genes and scaffolds for the cog
        sql_command = """SELECT {0}.gene_id,{0}.scaffold, {0}.dna_length,{1}.titles,{1}.bits
                         FROM {0}
                         INNER JOIN {1}
                         WHERE {0}.cog_id="{2}" AND {0}.gene_id={1}.gene_id
                      """.format(db.GenesTable, db.BlastResultsTable, cog_id)
        cursor = db.execute(sql_command)
        r = cursor.fetchone()
        while r:
            sc = r["scaffold"]
            organism, bit_score = blast_result.get_best_hit(
                r["titles"], r["bits"])
            genus = organism.split(" ")[0]
            add_to_scaffold_dictionary(scaffolds_dict, sc, genus,
                                       float(bit_score))
            r = cursor.fetchone()

    # Assign the genus with the largest bit score
    data = []
    for scaffold in scaffolds_dict:
        genus, bit_score = max(scaffolds_dict[scaffold].iteritems(),
                               key=operator.itemgetter(1))
        data.append((scaffold, genus, bit_score))
    data = BiologyBasedRules.filter_genus_assignments(data,
                                                      n_appearances=2,
                                                      bit_score_threshold=30)
    db.store_data(db.ScaffoldsAssignmentsTable, data)
    db.close()