def check_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb):
    global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA, SQL_FILE
    DBDRIVER = dbdriver
    DBTYPE = dbtype
    DBHOST = dbhost
    DBUSER = dbuser
    DBPASSWD = dbpasswd
    TESTDB = testdb

    #Check the database driver is installed:
    try:
        __import__(DBDRIVER)
    except ImportError:
        message = "Install %s if you want to use %s with BioSQL " % (DBDRIVER, DBTYPE)
        raise MissingExternalDependencyError(message)

    try:
        if DBDRIVER in ["sqlite3"]:
            server = BioSeqDatabase.open_database(driver = DBDRIVER, db = TESTDB)
        else:
            server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                                  user = DBUSER, passwd = DBPASSWD,
                                                  host = DBHOST)
            server.close()
            del server
    except Exception, e:
        message = "Connection failed, check settings if you plan to use BioSQL: %s" % str(e)
        raise MissingExternalDependencyError(message)
예제 #2
0
def check_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb):
    """Verify the database settings work for connecting."""
    global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA
    global SYSTEM, SQL_FILE
    DBDRIVER = dbdriver
    DBTYPE = dbtype
    DBHOST = dbhost
    DBUSER = dbuser
    DBPASSWD = dbpasswd
    TESTDB = testdb

    if not DBDRIVER or not DBTYPE or not DBUSER:
        # No point going any further...
        raise MissingExternalDependencyError("Incomplete BioSQL test settings")

    # Check the database driver is installed:
    if SYSTEM == "Java":
        try:
            if DBDRIVER in ["MySQLdb"]:
                import com.mysql.jdbc.Driver
            elif DBDRIVER in ["psycopg2", "pgdb"]:
                import org.postgresql.Driver
        except ImportError:
            message = "Install the JDBC driver for %s to use BioSQL " % DBTYPE
            raise MissingExternalDependencyError(message)
    else:
        try:
            __import__(DBDRIVER)
        except ImportError:
            if DBDRIVER in ["MySQLdb"]:
                message = "Install MySQLdb or mysqlclient if you want to use %s with BioSQL " % (DBTYPE)
            else:
                message = "Install %s if you want to use %s with BioSQL " % (DBDRIVER, DBTYPE)
            raise MissingExternalDependencyError(message)

    try:
        if DBDRIVER in ["sqlite3"]:
            server = BioSeqDatabase.open_database(driver=DBDRIVER, db=TESTDB)
        else:
            server = BioSeqDatabase.open_database(driver=DBDRIVER, host=DBHOST,
                                                  user=DBUSER, passwd=DBPASSWD)
        server.close()
        del server
    except Exception as e:
        message = "Connection failed, check settings if you plan to use BioSQL: %s" % e
        raise MissingExternalDependencyError(message)

    DBSCHEMA = "biosqldb-" + DBTYPE + ".sql"
    SQL_FILE = os.path.join(os.getcwd(), "BioSQL", DBSCHEMA)

    if not os.path.isfile(SQL_FILE):
        message = "Missing SQL schema file: %s" % SQL_FILE
        raise MissingExternalDependencyError(message)
예제 #3
0
def _do_db_cleanup():
    """Cleanup everything from TESTDB.

    Relevant for MySQL and PostgreSQL.
    """

    if DBDRIVER in ["psycopg2", "pgdb"]:
        # first open a connection the database
        # notice that postgres doesn't have createdb privileges, so
        # the TESTDB must exist
        server = BioSeqDatabase.open_database(driver=DBDRIVER, host=DBHOST,
                                              user=DBUSER, passwd=DBPASSWD,
                                              db=TESTDB)

        # The pgdb postgres driver does not support autocommit, so here we
        # commit the current transaction so that 'drop database' query will
        # be outside a transaction block
        server.adaptor.cursor.execute("COMMIT")
        # drop anything in the database
        # with Postgres, can get errors about database still being used.
        # Wait briefly to be sure previous tests are done with it.
        time.sleep(1)
        # drop anything in the database
        sql = r"DROP OWNED BY " + DBUSER
        server.adaptor.cursor.execute(sql, ())
        server.close()
    else:
        # first open a connection to create the database
        server = BioSeqDatabase.open_database(driver=DBDRIVER, host=DBHOST,
                                              user=DBUSER, passwd=DBPASSWD)
        # Auto-commit
        try:
            server.adaptor.autocommit()
        except AttributeError:
            pass
        # drop the database
        try:
            sql = r"DROP DATABASE " + TESTDB
            server.adaptor.cursor.execute(sql, ())
        except (server.module.OperationalError,
                server.module.Error,
                server.module.DatabaseError) as e:  # the database doesn't exist
            pass
        except (server.module.IntegrityError,
                server.module.ProgrammingError) as e:  # ditto--perhaps
            if str(e).find('database "%s" does not exist' % TESTDB) == -1:
                server.close()
                raise
        # create a new database
        sql = r"CREATE DATABASE " + TESTDB
        server.adaptor.execute(sql, ())
        server.close()
예제 #4
0
 def loop(self, filename, format):
     original_records = list(SeqIO.parse(open(filename, "rU"), format))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                           user = DBUSER, passwd = DBPASSWD,
                                           host = DBHOST, db = TESTDB)
     db_name = "test_loop_%s" % filename  # new namespace!
     db = server.new_database(db_name)
     count = db.load(original_records)
     self.assertEqual(count, len(original_records))
     server.commit()
     #Now read them back...
     biosql_records = [db.lookup(name=rec.name)
                       for rec in original_records]
     #And check they agree
     self.assertTrue(compare_records(original_records, biosql_records))
     #Now write to a handle...
     handle = StringIO()
     SeqIO.write(biosql_records, handle, "gb")
     #Now read them back...
     handle.seek(0)
     new_records = list(SeqIO.parse(handle, "gb"))
     #And check they still agree
     self.assertEqual(len(new_records), len(original_records))
     for old, new in zip(original_records, new_records):
         #TODO - remove this hack because we don't yet write these (yet):
         for key in ["comment", "references", "db_source"]:
             if key in old.annotations and key not in new.annotations:
                 del old.annotations[key]
         self.assertTrue(compare_record(old, new))
     #Done
     server.close()
예제 #5
0
def create_database():
    """Create an empty BioSQL database."""
    # first open a connection to create the database
    server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                          user = DBUSER, passwd = DBPASSWD,
                                          host = DBHOST)

    # Auto-commit: postgresql cannot drop database in a transaction
    try:
        server.adaptor.autocommit()
    except AttributeError:
        pass

    # drop anything in the database
    try:
        # with Postgres, can get errors about database still being used and
        # not able to be dropped. Wait briefly to be sure previous tests are
        # done with it.
        import time
        time.sleep(1)

        sql = r"DROP DATABASE " + TESTDB
        server.adaptor.cursor.execute(sql, ())
    except server.module.OperationalError: # the database doesn't exist
        pass
    except (server.module.IntegrityError,
            server.module.ProgrammingError), e: # ditto--perhaps
        if str(e).find('database "%s" does not exist' % TESTDB) == -1 :
            raise
예제 #6
0
def _do_db_create():
    """Do the actual work of database creation. Relevant for MySQL and PostgreSQL
    """
    # first open a connection to create the database
    server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                          user = DBUSER, passwd = DBPASSWD,
                                          host = DBHOST)

    if DBDRIVER == "pgdb":
        # The pgdb postgres driver does not support autocommit, so here we
        # commit the current transaction so that 'drop database' query will
        # be outside a transaction block
        server.adaptor.cursor.execute("COMMIT")
    else:
        # Auto-commit: postgresql cannot drop database in a transaction
        try:
            server.adaptor.autocommit()
        except AttributeError:
            pass

    # drop anything in the database
    try:
        # with Postgres, can get errors about database still being used and
        # not able to be dropped. Wait briefly to be sure previous tests are
        # done with it.
        import time
        time.sleep(1)

        sql = r"DROP DATABASE " + TESTDB
        server.adaptor.cursor.execute(sql, ())
    except (server.module.OperationalError,
            server.module.Error,
            server.module.DatabaseError), e:  # the database doesn't exist
        pass
예제 #7
0
def load_multi_database(gb_filename_or_handle, gb_filename_or_handle2):
    """Load two GenBank files into a new BioSQL database as different subdatabases.

    This is useful for running tests against a newly created database.
    """

    TESTDB = create_database()
    # now open a connection to load the database
    db_name = "biosql-test"
    db_name2 = "biosql-test2"
    server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB)
    db = server.new_database(db_name)

    # get the GenBank file we are going to put into it
    iterator = SeqIO.parse(gb_filename_or_handle, "gb")
    count = db.load(iterator)

    db = server.new_database(db_name2)

    # get the GenBank file we are going to put into it
    iterator = SeqIO.parse(gb_filename_or_handle2, "gb")
    # finally put it in the database
    count2 = db.load(iterator)
    server.commit()

    server.close()
    return count + count2
예제 #8
0
def main(gbfile, length=10000):
    driver = "MySQLdb"
    user   = "******"
    passwd = ""
    host   = "localhost"
    dbname = "bioseqdb"
    
    print "Parsing Genbank file sequence file...."
    with open(gbfile) as gb_handle:
        records = list(SeqIO.parse(gb_handle, "genbank"))
    print "Sorting by size and name......."
    longrecords = [record for record in records if len(record) > length]
    longrecords.sort(key=lambda x: x.name) #sort by name
    
    print "Writing to BioSQL database..."
    server = BioSeqDatabase.open_database(driver=driver, user=user,
            passwd=passwd, host=host, db=dbname)
    
    try:
        if biodb_name not in server.keys():
            server.new_database(biodb_name)
        else:
            server.remove_database(biodb_name)
            server.adaptor.commit()
            server.new_databse(biodb_name)
        db = server[biodb_name]
        db.load(longrecords)
        server.adaptor.commit()
    except:
        server.adaptor.rollback()
        raide
예제 #9
0
    def setUp(self):
        global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA
        global SYSTEM, SQL_FILE

        Entrez.email = "*****@*****.**"
        # create TESTDB
        TESTDB = create_database()

        # load the database
        db_name = "biosql-test"
        self.server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                                   user=DBUSER, passwd=DBPASSWD,
                                                   host=DBHOST, db=TESTDB)

        # remove the database if it already exists
        try:
            self.server[db_name]
            self.server.remove_database(db_name)
        except KeyError:
            pass

        self.db = self.server.new_database(db_name)

        # get the GenBank file we are going to put into it
        self.iterator = SeqIO.parse("GenBank/cor6_6.gb", "gb")
예제 #10
0
 def trans(self, filename, format):
     original_records = list(SeqIO.parse(open(filename, "rU"), format))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                           user = DBUSER, passwd = DBPASSWD,
                                           host = DBHOST, db = TESTDB)
     db_name = "test_trans1_%s" % filename  # new namespace!
     db = server.new_database(db_name)
     count = db.load(original_records)
     self.assertEqual(count, len(original_records))
     server.commit()
     #Now read them back...
     biosql_records = [db.lookup(name=rec.name)
                       for rec in original_records]
     #And check they agree
     self.assertTrue(compare_records(original_records, biosql_records))
     #Now write to a second name space...
     db_name = "test_trans2_%s" % filename  # new namespace!
     db = server.new_database(db_name)
     count = db.load(biosql_records)
     self.assertEqual(count, len(original_records))
     #Now read them back again,
     biosql_records2 = [db.lookup(name=rec.name)
                        for rec in original_records]
     #And check they also agree
     self.assertTrue(compare_records(original_records, biosql_records2))
     #Done
     server.close()
예제 #11
0
    def test_add_from_gff_with_taxonomy(self):
        """Add in sequences from a gff + fasta file given taxonomy."""
        gff = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.gff')
        fasta = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.fna')
        runner = CliRunner()
        result = runner.invoke(cli.main, self.common_params + ['-t', '-T', 511145, '-g', gff, '-f', fasta, '-D', 'test'])
        self.assertEqual(result.exit_code, 0)

        server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser,
                             passwd = self.dbpassword, host = self.dbhost, db = self.dbname)

        rows = server.adaptor.execute_and_fetchall("SELECT name FROM taxon_name where name_class = 'scientific name'")
        dbnames = set([x[0] for x in rows])
        names = set(['cellular organisms',
                    'Bacteria',
                    'Proteobacteria',
                    'Gammaproteobacteria',
                    'Enterobacterales',
                    'Enterobacteriaceae',
                    'Escherichia',
                    'Escherichia coli',
                    'Escherichia coli K-12',
                    'Escherichia coli str. K-12 substr. MG1655'])
        self.assertCountEqual(dbnames, names)
        server.close()
def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)
    if args.database_name not in server.keys():
        server.new_database(args.database_name)

    db = server[args.database_name]



    gen = []
    if args.fasta is not None:
        for rec in SeqIO.parse(args.fasta, 'fasta'):
            gen.append(rec.name)
    elif args.genbank is not None:
        for rec in SeqIO.parse(args.genbank, 'genbank'):
            gen.append(rec.name)
    elif args.input is not None:
        with open(args.input) as fp:
            for line in fp:
                gen.append(line.rstrip())

    if args.remove:
        taxon_id = None
    else:
        taxon_id = add_new_taxonomy(server, args.new_taxons, args.taxid)

    for rec in gen:
        server.adaptor.execute('update bioentry set taxon_id = %s where bioentry_id = %s',(taxon_id, db.adaptor.fetch_seqid_by_display_id(db.dbid, rec)))
    server.commit()
def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)
    if args.database_name not in server.keys():
        server.new_database(args.database_name)

    db = server[args.database_name]

    try:
        if args.gff is not None and args.fasta is not None:
            load_gff(db, args.gff, args.fasta, args.tax_lookup, args.taxid)
        elif args.genbank is not None:
            load_genbank(db, args.genbank, args.tax_lookup, args.taxid)
    except:
        server.adaptor.rollback()
        raise

    if args.new_taxons:
        taxon_id = add_new_taxonomy(server, args.new_taxons, args.taxid)

        if args.fasta is not None:
            gen = SeqIO.parse(args.fasta, 'fasta')
        elif args.genbank is not None:
            gen = SeqIO.parse(args.genbank, 'genbank')

        for rec in gen:
            server.adaptor.execute('update bioentry set taxon_id = %s where bioentry_id = %s',(taxon_id, db.adaptor.fetch_seqid_by_display_id(db.dbid, rec.name)))

    server.commit()
예제 #14
0
def create_database():
    """Delete any existing BioSQL test database, then (re)create an empty BioSQL database."""
    if DBDRIVER in ["sqlite3"]:
        global TESTDB
        if os.path.exists(TESTDB):
            try:
                os.remove(TESTDB)
            except:
                time.sleep(1)
                try:
                    os.remove(TESTDB)
                except:
                    # Seen this with PyPy 2.1 (and older) on Windows -
                    # which suggests an open handle still exists?
                    print("Could not remove %r" % TESTDB)
                    pass
        # Now pick a new filename - just in case there is a stale handle
        # (which might be happening under Windows...)
        TESTDB = temp_db_filename()
    else:
        _do_db_create()

    # now open a connection to load the database
    server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB)
    try:
        server.load_database_sql(SQL_FILE)
        server.commit()
        server.close()
    except:
        # Failed, but must close the handle...
        server.close()
        raise
예제 #15
0
def get_database():
    """Perform a connection with the database.
    
    XXX The info here shouldn't be hard coded and should be specified
    on the commandline.
    """
    server = BioSeqDatabase.open_database(host="192.168.0.192", user="******", passwd="", db="biosql_new")
    return server["embl_rod"]
def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)
    sfids = []
    with open(args.input) as fp:
        for line in fp:
            sfids.append(line.rstrip())

    print_feature_qv_csv(server, sfids)
예제 #17
0
    def setUp(self):
        gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb")
        load_database(gb_file)

        self.server = BioSeqDatabase.open_database(
            driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
        )
        self.db = self.server["biosql-test"]
예제 #18
0
    def gbk_upload(self):
        """
        Upload the BioSQL database with genbank data.
        """
        t_count = 0
        os.chdir(self.path)
        print(os.getcwd())
        if os.path.isdir(self.path + '/Databases') is False:
            os.mkdir('Databases')
        for tier in os.listdir(os.getcwd()):
            if tier == 'Databases':
                continue
            db_name = str(tier) + '.db'
            if os.path.isfile(self.path + '/Databases/' + db_name) is False:
                print('Copying Template BioSQL Database...  '
                      'This may take a few minutes...')
                shutil.copy2(where.Templates + '/Template_BioSQL_DB.db',
                             self.path + '/Databases/%s' % db_name)
            else:
                os.remove(self.path + '/Databases/' + db_name)
                print('Copying Template BioSQL Database...  '
                      'This may take a few minutes...')
                shutil.copy2(where.Templates + '/Template_BioSQL_DB.db',
                             self.path + '/Databases/%s' % db_name)

            server = BioSeqDatabase.open_database(driver='sqlite3',
                                                  db=(self.path +
                                                      '/Databases/' + db_name))
            os.chdir(tier)
            for gene in os.listdir(os.getcwd()):
                os.chdir(gene)
                sub_db_name = gene
                for file in os.listdir(os.getcwd()):
                    try:
                        if sub_db_name not in server.keys():
                            server.new_database(sub_db_name)
                        db = server[sub_db_name]
                        count = db.load(SeqIO.parse(file, 'genbank'))
                        server.commit()
                        print('Server Commited %s' % sub_db_name)
                        print('%s database loaded with %s.' % (db.dbid, file))
                        print("That file contains %s genbank records." %
                              str(count))
                        t_count = t_count + count
                        print(
                            'The total number of files loaded so far is %i.' %
                            t_count)
                    except BaseException:
                        server.rollback()
                        try:
                            del server[sub_db_name]
                            server.commit()
                        except BaseException:
                            raise
                        raise
                os.chdir('..')
            os.chdir('..')
예제 #19
0
    def setUp(self):
        """Connect to and load up the database."""
        load_database("GenBank/cor6_6.gb")

        self.server = BioSeqDatabase.open_database(
            driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
        )

        self.db = self.server["biosql-test"]
예제 #20
0
    def setUp(self):
        """Load a database."""
        load_database("GenBank/cor6_6.gb")

        self.server = BioSeqDatabase.open_database(
            driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
        )
        self.db = self.server["biosql-test"]
        self.item = self.db.lookup(accession="X62281")
예제 #21
0
 def setUp(self):
     #drop any old database and create a new one:
     create_database()
     #connect to new database:
     self.server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                                user = DBUSER, passwd = DBPASSWD,
                                                host = DBHOST, db = TESTDB)
     #Create new namespace within new empty database:
     self.db = self.server.new_database("biosql-test")
def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)

    seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name)

    if args.output_format == 'feat-prot':
        extract_feature_sql(server, seqfeature_ids, type=['CDS'], translate=True )
    elif args.output_format == 'feat-nucl':
        extract_feature_sql(server, seqfeature_ids )
예제 #23
0
 def setUp(self):
     # drop any old database and create a new one:
     TESTDB = create_database()
     # connect to new database:
     self.server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                                user=DBUSER, passwd=DBPASSWD,
                                                host=DBHOST, db=TESTDB)
     # Create new namespace within new empty database:
     self.db = self.server.new_database("biosql-test")
예제 #24
0
 def setUp(self):
     """Connect to the database."""
     db_name = "biosql-test-seqio"
     server = BioSeqDatabase.open_database(driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB)
     self.server = server
     if db_name not in server:
         self.db = server.new_database(db_name)
         server.commit()
     self.db = self.server[db_name]
예제 #25
0
    def setUp(self):
        """Load a database."""
        load_database("GenBank/cor6_6.gb")

        self.server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                                   user=DBUSER, passwd=DBPASSWD,
                                                   host=DBHOST, db=TESTDB)
        self.db = self.server["biosql-test"]
        self.item = self.db.lookup(accession="X62281")
예제 #26
0
    def test_003_loader2(self):
        start = time.time()
        graph = Graph(neo4jurl, user=neo4juser, password=neo4jpass,
                      bolt=False)
        #if conn():
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb)
        seqann = BioSeqAnn(server=server, verbose=False)

        pickle_file1 = "unique_db-feats.pickle"
        pickle_file2 = "feature-service.pickle"
        pickle_gfe2feat = "gfe2feat.pickle"
        pickle_file3 = "gfe2hla.pickle"
        pickle_file4 = "seq2hla.pickle"
        with open(pickle_gfe2feat, 'rb') as handle1:
            gfe_feats = pickle.load(handle1)

        with open(pickle_file1, 'rb') as handle1:
            feats = pickle.load(handle1)

        with open(pickle_file2, 'rb') as handle2:
            cached_feats = pickle.load(handle2)

        with open(pickle_file3, 'rb') as handle3:
            gfe2hla = pickle.load(handle3)

        with open(pickle_file4, 'rb') as handle:
            seq2hla = pickle.load(handle)

        pygfe = pyGFE(graph=graph,
                      seqann=seqann,
                      load_features=False,
                      verbose=False,
                      features=feats,
                      seq2hla=seq2hla,
                      gfe2hla=gfe2hla,
                      gfe_feats=gfe_feats,
                      cached_features=cached_feats,
                      loci=["HLA-A"])
        self.assertIsInstance(pygfe, pyGFE)
        seqs = list(SeqIO.parse(self.data_dir + "/known_A.fasta", "fasta"))
        typing1 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.20.0")
        typing2 = pygfe.type_from_seq("HLA-A", str(seqs[0].seq), "3.31.0")
        end = time.time()
        time_taken = end - start
        print("TIME TAKEN: " + str(time_taken))
        self.assertEqual(typing2.hla, 'HLA-A*01:01:01:01')
        self.assertEqual(typing2.status, "documented")
        self.assertIsInstance(typing2, Typing)
        self.assertEqual(typing1.hla, 'HLA-A*01:01:01:01')
        self.assertEqual(typing1.status, "documented")
        self.assertIsInstance(typing1, Typing)
        pass
 def setUp(self):
     # drop any old database and create a new one:
     testdb, dbdriver, dbuser, dbpassword, dbhost = connection_parameters(create=True)
     # connect to new database:
     self.server = BioSeqDatabase.open_database(driver=dbdriver,
                                                user=dbuser, passwd=dbpassword,
                                                host=dbhost, db=testdb)
     self._create_taxonomy()
     self.taxon_tree = TaxonTree(self.server.adaptor)
     self.testdb = testdb
예제 #28
0
    def setUp(self):
        gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb")
        load_database(gb_file)

        self.server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                                   user=DBUSER,
                                                   passwd=DBPASSWD,
                                                   host=DBHOST,
                                                   db=TESTDB)
        self.db = self.server["biosql-test"]
예제 #29
0
    def setUp(self):
        """Connect to and load up the database.
        """
        load_database("GenBank/cor6_6.gb")

        self.server = BioSeqDatabase.open_database(
            driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
        )

        self.db = self.server["biosql-test"]
예제 #30
0
def check_config(dbdriver, dbtype, dbhost, dbuser, dbpasswd, testdb):
    global DBDRIVER, DBTYPE, DBHOST, DBUSER, DBPASSWD, TESTDB, DBSCHEMA
    global SYSTEM, SQL_FILE
    DBDRIVER = dbdriver
    DBTYPE = dbtype
    DBHOST = dbhost
    DBUSER = dbuser
    DBPASSWD = dbpasswd
    TESTDB = testdb

    #Check the database driver is installed:
    if SYSTEM == "Java":
        try:
            if DBDRIVER in ["MySQLdb"]:
                import com.mysql.jdbc.Driver
            elif DBDRIVER in ["psycopg2"]:
                import org.postgresql.Driver
        except ImportError:
            message = "Install the JDBC driver for %s to use BioSQL " % DBTYPE
            raise MissingExternalDependencyError(message)
    else:
        try:
            __import__(DBDRIVER)
        except ImportError:
            message = "Install %s if you want to use %s with BioSQL " % (
                DBDRIVER, DBTYPE)
            raise MissingExternalDependencyError(message)

    try:
        if DBDRIVER in ["sqlite3"]:
            server = BioSeqDatabase.open_database(driver=DBDRIVER, db=TESTDB)
        else:
            server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                                  user=DBUSER,
                                                  passwd=DBPASSWD,
                                                  host=DBHOST)
        server.close()
        del server
    except Exception, e:
        message = "Connection failed, check settings if you plan to use BioSQL: %s" % str(
            e)
        raise MissingExternalDependencyError(message)
예제 #31
0
    def load_db(params):
        sqlpsw = os.environ['SQLPSW']
        db_type = params["chlamdb.db_type"]
        db_name = params["chlamdb.db_name"]

        if db_type != "sqlite":
            server = BioSeqDatabase.open_database(driver="MySQLdb",
                                                  user="******",
                                                  passwd=sqlpsw,
                                                  host="127.0.0.1",
                                                  db=db_name,
                                                  charset='utf8',
                                                  use_unicode=True)
        else:
            server = BioSeqDatabase.open_database(driver="sqlite3",
                                                  user="******",
                                                  passwd=sqlpsw,
                                                  host="127.0.0.1",
                                                  db=f"{db_name}")
        return DB(server, db_name)
예제 #32
0
    def get_gbk_file(self, accession, gene, organism, server_flag=None):
        """Search a GenBank database for a target accession number.

        This function searches through the given NCBI databases (created by
        uploading NCBI refseq .gbff files to a BioPython BioSQL database) and
        creates single GenBank files.  This function can be used after a
        blast or on its own.  If used on it's own then the NCBI .db files must
        be manually moved to the proper directories.

        :param accession: Accession number of interest without the version.
        :param gene: Target gene of the accession number parameter.
        :param organism: Target organism of the accession number parameter.
        :param server_flag:  (Default value = None)
        :return:
        """

        gene_path = self.raw_data / Path(gene) / Path('GENBANK')
        Path.mkdir(gene_path, parents=True, exist_ok=True)

        # Parse each database to find the proper GenBank record
        for FILE in self.db_files_list:
            db_file_path = self.ncbi_db_repo / Path(FILE)
            # Stop searching if the GenBank record has been created.
            if server_flag is True:
                break
            server = BioSeqDatabase.open_database(driver='sqlite3',
                                                  db=str(db_file_path))
            # Parse the sub-databases
            for SUB_DB_NAME in server.keys():
                db = server[SUB_DB_NAME]
                try:
                    record = db.lookup(accession=accession)
                    gbk_file = '%s_%s.gbk' % (gene, organism)
                    gbk_file_path = gene_path / Path(gbk_file)
                    with open(gbk_file_path, 'w') as GB_file:
                        GB_file.write(record.format('genbank'))
                        self.genbanklog.info(GB_file.name, 'created')
                    # Make sure we have the correct GenBank file.
                    self.gbk_quality_control(gbk_file_path, gene, organism)
                    # Stop searching if the GenBank record has been created.
                    server_flag = True
                    break
                except IndexError:
                    self.genbanklog.critical(
                        'Index Error in %s.  Moving to the next database...' %
                        SUB_DB_NAME)
                    continue

        # If the file has not been created after searching, then raise an error
        if server_flag is not True:
            self.genbanklog.critical(
                "The GenBank file was not created for %s (%s, %s)." %
                (accession, gene, organism))
            raise FileNotFoundError
예제 #33
0
 def setUp(self):
     """Connect to the database."""
     db_name = "biosql-test-seqio"
     server = BioSeqDatabase.open_database(
         driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
     )
     self.server = server
     if db_name not in server:
         self.db = server.new_database(db_name)
         server.commit()
     self.db = self.server[db_name]
예제 #34
0
    def open(self, driver="pgdb", **kwargs):
        """
        Opens a connection to a relational database.
        """

        self.driver = driver
        self.dbargs = kwargs
        self.server = BioSeqDatabase.open_database(driver=driver, **kwargs)
        self.conn = self.server.adaptor.conn
        self.crs = self.server.adaptor.cursor
        if hasattr(self.crs, 'copy_from'):
            self.has_copy = True
예제 #35
0
 def test_backwards_compatibility(self):
     """Check can re-use an old BioSQL SQLite3 database."""
     original_records = list(SeqIO.parse("GenBank/cor6_6.gb", "gb"))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                           db="BioSQL/cor6_6.db")
     db = server["OLD"]
     self.assertEqual(len(db), len(original_records))
     #Now read them back...
     biosql_records = [db.lookup(name=rec.name) for rec in original_records]
     #And check they agree
     self.assertTrue(compare_records(original_records, biosql_records))
예제 #36
0
def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver,
                                          db=args.database,
                                          user=args.user,
                                          host=args.host,
                                          passwd=args.password)
    sfids = []
    with open(args.input) as fp:
        for line in fp:
            sfids.append(line.rstrip())

    print_feature_qv_csv(server, sfids)
예제 #37
0
 def test_change_taxonomy(self):
     """Check that taxonomy can be properly changed."""
     runner = CliRunner()
     infile = os.path.join(os.path.dirname(__file__), 'test_files', 'modify_header.txt')
     result = runner.invoke(cli.main, self.common_params + ['-i', infile, '-T', '112040', '--key', 'accession'])
     self.assertEqual(result.exit_code, 0)
     print(result.output)
     server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser,
                          passwd = self.dbpassword, host = self.dbhost, db = self.dbname)
     rows = server.adaptor.execute_and_fetchall("select ncbi_taxon_id from taxon join bioentry using(taxon_id) where bioentry.accession = 'NC_000913'")
     taxid = rows[0][0]
     self.assertEqual(taxid, 112040)
예제 #38
0
    def test_add_from_genbank(self):
        """Add in sequences from a Genbank file."""
        infile = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.gbff')
        runner = CliRunner()
        result = runner.invoke(cli.main, self.common_params + ['-G', infile, '-D', 'test'])
        self.assertEqual(result.exit_code, 0)

        server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser,
                             passwd = self.dbpassword, host = self.dbhost, db = self.dbname)

        rows = server.adaptor.execute_and_fetchall("SELECT name FROM taxon_name where name_class = 'scientific name'")
        self.assertEqual(rows, [('Escherichia coli str. K-12 substr. MG1655',)])
        server.close()
예제 #39
0
 def test_backwards_compatibility(self):
     """Check can re-use an old BioSQL SQLite3 database."""
     original_records = list(SeqIO.parse("GenBank/cor6_6.gb", "gb"))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver=DBDRIVER,
                                           db="BioSQL/cor6_6.db")
     db = server["OLD"]
     self.assertEqual(len(db), len(original_records))
     #Now read them back...
     biosql_records = [db.lookup(name=rec.name) \
                       for rec in original_records]
     #And check they agree
     self.assertTrue(compare_records(original_records, biosql_records))
예제 #40
0
    def setUp(self):
        """Load a database.
        """
        gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb")
        gb_handle = open(gb_file, "r")
        load_database(gb_handle)
        gb_handle.close()

        self.server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                                   user = DBUSER, passwd = DBPASSWD,
                                                   host = DBHOST, db = TESTDB)
        self.db = self.server["biosql-test"]
        self.item = self.db.lookup(accession = "X62281")
예제 #41
0
    def setUp(self):
        """Connect to and load up the database.
        """
        gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb")
        gb_handle = open(gb_file, "r")
        load_database(gb_handle)
        gb_handle.close()

        self.server = BioSeqDatabase.open_database(
            driver=DBDRIVER, user=DBUSER, passwd=DBPASSWD, host=DBHOST, db=TESTDB
        )

        self.db = self.server["biosql-test"]
def main(args):

    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)
    if args.database_name not in server.keys():
        server.new_database(args.database_name)

    db = server[args.database_name]
    try:
        load_img(db, args.directory, args.tax_lookup, args.taxid)
        server.adaptor.commit()
    except:
        server.adaptor.rollback()
        raise
예제 #43
0
    def setUp(self):
        """Load a database.
        """
        gb_file = os.path.join(os.getcwd(), "GenBank", "cor6_6.gb")
        gb_handle = open(gb_file, "r")
        load_database(gb_handle)
        gb_handle.close()

        self.server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                                   user = DBUSER, passwd = DBPASSWD,
                                                   host = DBHOST, db = TESTDB)
        self.db = self.server["biosql-test"]
        self.item = self.db.lookup(accession = "X62281")
예제 #44
0
 def test_005_insertionserv(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     seqann = BioSeqAnn(server=server,
                        verbose=False,
                        verbosity=verbosity,
                        pid="004_insertion")
     input_seq = self.data_dir + '/insertion_seqs.fasta'
     for ex in self.expected['insertion']:
         i = int(ex['index'])
         locus = ex['locus']
         allele = ex['name']
         hla, loc = locus.split("-")
         in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
         ann = seqann.annotate(in_seq, locus)
         self.assertEqual(ann.method, "nt_search")
         self.assertFalse(ann.missing)
         self.assertFalse(ann.blocks)
         self.assertIsInstance(ann, Annotation)
         self.assertTrue(ann.complete_annotation)
         self.assertGreater(len(ann.annotation.keys()), 1)
         db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
         expected = db.lookup(name=allele)
         self.assertEqual(ann.gfe, ex['gfe'])
         self.assertGreater(len(ann.structure), 1)
         for feat in ann.structure:
             self.assertIsInstance(feat, Feature)
         n_diffs = 0
         expected_seqs = get_features(expected)
         self.assertGreater(len(expected_seqs.keys()), 1)
         for feat in expected_seqs:
             if feat not in ann.annotation:
                 self.assertEqual(feat, None)
             else:
                 if feat in ex['diff']:
                     n_diffs += 1
                     self.assertNotEqual(str(expected_seqs[feat]),
                                         str(ann.annotation[feat].seq))
                     diff_len = len(str(ann.annotation[feat].seq)) - \
                         len(str(expected_seqs[feat]))
                     self.assertEqual(diff_len, ex['lengths'][feat])
                 else:
                     self.assertEqual(str(expected_seqs[feat]),
                                      str(ann.annotation[feat].seq))
         self.assertEqual(n_diffs, len(ex['diff']))
     server.close()
     pass
예제 #45
0
    def test_009_partialambigserv(self):
        server = BioSeqDatabase.open_database(driver="pymysql",
                                              user=biosqluser,
                                              passwd=biosqlpass,
                                              host=biosqlhost,
                                              db=biosqldb,
                                              port=biosqlport)
        seqann = BioSeqAnn(server=server,
                           verbose=False,
                           verbosity=verbosity,
                           pid="006_partialambig")
        input_seq = self.data_dir + '/partial_ambig.fasta'

        for ex in self.expected['partial_ambig']:
            i = int(ex['index'])
            locus = ex['locus']
            allele = ex['name']
            hla, loc = locus.split("-")
            print(str(i), allele)
            in_seq = list(SeqIO.parse(input_seq, "fasta"))[i]
            ann = seqann.annotate(in_seq, locus)
            self.assertTrue(ann.complete_annotation)
            self.assertEqual(ann.method, ex['method'])
            self.assertFalse(ann.blocks)
            self.assertIsInstance(ann, Annotation)
            self.assertTrue(ann.complete_annotation)
            self.assertGreater(len(ann.annotation.keys()), 1)
            db = seqann.refdata.server[seqann.refdata.dbversion + "_" + loc]
            expected = db.lookup(name=allele)
            expected_seqs = get_features(expected)
            self.assertGreater(len(expected_seqs.keys()), 1)
            self.assertGreater(len(ann.annotation.keys()), 1)
            self.assertEqual(ann.gfe, ex['gfe'])

            self.assertGreater(len(ann.structure), 1)
            for feat in ann.structure:
                self.assertIsInstance(feat, Feature)
            # Make sure only mapped feats exist
            for mf in ex['missing_feats']:
                self.assertFalse(mf in ann.annotation)

            for feat in ex['feats']:
                if feat in ex['diff']:
                    self.assertNotEqual(str(expected_seqs[feat]),
                                        str(ann.annotation[feat].seq))
                else:
                    self.assertEqual(str(expected_seqs[feat]),
                                     str(ann.annotation[feat].seq))

        server.close()
        pass
예제 #46
0
def load_gb_dict_into_db(genbank_data):
    '''Load genbank_data as a dictionary into the mysql database.
    '''
    
    print("Loading genbank entries into the database ...")

    server = BioSeqDatabase.open_database(driver = db_driver, user = db_user, passwd = db_passwd, host = db_host, db = db_name)
    db = server[namespace]
    count = db.load(genbank_data.values())
    server.commit()

    print("Loaded %i sequences" % count)

    return()
예제 #47
0
    def db(self, database):
        """
        Create FASTA files for every GenBank record in the database.
        """
        server = BioSeqDatabase.open_database(driver="sqlite3", db=database)
        try:
            for db_name in server.keys():
                db = server[db_name]
                for item in db.keys():
                    record = db.lookup(item)

                    self.write_fasta_file(record)
        except:
            raise ()
예제 #48
0
    def test_add_from_gff(self):
        """Add in sequences from a gff + fasta file."""
        gff = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.gff')
        fasta = os.path.join(os.path.dirname(__file__), 'test_files', 'GCF_000005845.2_ASM584v2_genomic.fna')
        runner = CliRunner()
        result = runner.invoke(cli.main, self.common_params + ['-g', gff, '-f', fasta, '-D', 'test'])
        self.assertEqual(result.exit_code, 0)

        server = BioSeqDatabase.open_database(driver = self.dbdriver, user = self.dbuser,
                             passwd = self.dbpassword, host = self.dbhost, db = self.dbname)

        rows = server.adaptor.execute_and_fetchall("SELECT name FROM taxon_name where name_class = 'scientific name'")
        self.assertEqual(rows, [])
        server.close()
예제 #49
0
    def setUp(self):
        """Connect to and load up the database.
        """
        gb_file = "GenBank/cor6_6.gb"
        gb_handle = open(gb_file, "r")
        load_database(gb_handle)
        gb_handle.close()

        self.server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                                   user = DBUSER,
                                                   passwd = DBPASSWD,
                                                   host = DBHOST, db = TESTDB)

        self.db = self.server["biosql-test"]
예제 #50
0
 def test_002_server(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     refdata = ReferenceData(server=server)
     self.assertIsInstance(refdata, ReferenceData)
     self.assertTrue(refdata.server_avail)
     self.assertFalse(refdata.seqref)
     self.assertFalse(refdata.hlaref)
     server.close()
     pass
예제 #51
0
    def __init__(self):

        server = BioSeqDatabase.open_database(driver="MySQLdb",
                                              user="******",
                                              passwd="FurtherFlowersVenus",
                                              host="localhost",
                                              db="bioseqdb")

        handlers = [(r"/", MainHandler),
                    (r"/just_testing", Just_Testing, {
                        'server': server
                    })]

        settings = dict(autoescape=None, )
        tornado.web.Application.__init__(self, handlers, **settings)
예제 #52
0
def create_database():
    """Delete any existing BioSQL test database, then (re)create an empty BioSQL database."""
    if DBDRIVER in ["sqlite3"]:
        if os.path.exists(TESTDB):
            os.remove(TESTDB)
    else:
        _do_db_create()

    # now open a connection to load the database
    server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                          user = DBUSER, passwd = DBPASSWD,
                                          host = DBHOST, db = TESTDB)
    server.load_database_sql(SQL_FILE)
    server.commit()
    server.close()
예제 #53
0
 def test_003_dblist(self):
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     for db in self.dblist:
         refdata = ReferenceData(server=server, dbversion=db)
         self.assertEqual(refdata.dbversion, db)
         self.assertTrue(refdata.server_avail)
         self.assertFalse(refdata.seqref)
         self.assertFalse(refdata.hlaref)
     server.close()
     pass
예제 #54
0
def create_database():
    """Delete any existing BioSQL test database, then (re)create an empty BioSQL database."""
    if DBDRIVER in ["sqlite3"]:
        if os.path.exists(TESTDB):
            os.remove(TESTDB)
    else:
        _do_db_create()

    # now open a connection to load the database
    server = BioSeqDatabase.open_database(driver = DBDRIVER,
                                          user = DBUSER, passwd = DBPASSWD,
                                          host = DBHOST, db = TESTDB)
    server.load_database_sql(SQL_FILE)
    server.commit()
    server.close()
def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)

    seqfeature_ids = get_seqfeature_ids_from_qv(server, args.qualifier, args.value, args.database_name, fuzzy=args.fuzzy)

    if args.feature_type is not None:
        types = args.feature_type
    elif args.output_format == 'feat-prot':
        types = ['CDS']
    elif args.output_format == 'feat-nucl':
        types = ['CDS', 'rRNA', 'tRNA']

    if args.output_format == 'feat-prot':
        extract_feature_sql(server, seqfeature_ids, type=types, translate=True )
    elif args.output_format == 'feat-nucl':
        extract_feature_sql(server, seqfeature_ids, type=types)
예제 #56
0
 def test_007_align(self):
     # TODO: Add class II tests
     server = BioSeqDatabase.open_database(driver="pymysql",
                                           user=biosqluser,
                                           passwd=biosqlpass,
                                           host=biosqlhost,
                                           db=biosqldb,
                                           port=biosqlport)
     refdata = ReferenceData(server=server, alignments=True)
     for ex in self.expected['align']:
         locus = ex['locus']
         allele = ex['name'].split("_")[0]
         hla, loc = locus.split("-")
         align = "".join([refdata.annoated_alignments[loc][allele][s]['Seq'] for s in refdata.annoated_alignments[loc][allele].keys()])
         self.assertEqual(str(align),
                          str(ex['alignment']))
 def test_backwards_compatibility(self):
     """Check can re-use an old BioSQL SQLite3 database."""
     original_records = []
     for record in SeqIO.parse("GenBank/cor6_6.gb", "gb"):
         if record.annotations["molecule_type"] == "mRNA":
             record.annotations["molecule_type"] = "DNA"
         original_records.append(record)
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver=DBDRIVER, db="BioSQL/cor6_6.db")
     db = server["OLD"]
     self.assertEqual(len(db), len(original_records))
     # Now read them back...
     biosql_records = [db.lookup(name=rec.name) for rec in original_records]
     # And check they agree
     self.compare_records(original_records, biosql_records)
     server.close()
def main(args):
    server = BioSeqDatabase.open_database(driver=args.driver, db=args.database, user=args.user, host=args.host, passwd=args.password)
    if args.database_name not in server.keys():
        server.new_database(args.database_name)

    db = server[args.database_name]
    try:
        if args.gff is not None and args.fasta is not None:
            load_gff(db, args.gff, args.fasta, args.tax_lookup, args.taxid)
            server.adaptor.commit()
        elif args.genbank is not None:
            load_genbank(db, args.genbank, args.tax_lookup)
            server.adaptor.commit()
    except:
        server.adaptor.rollback()
        raise
예제 #59
0
 def test_backwards_compatibility(self):
     """Check can re-use an old BioSQL SQLite3 database."""
     original_records = list(SeqIO.parse("GenBank/cor6_6.gb", "gb"))
     # now open a connection to load the database
     server = BioSeqDatabase.open_database(driver=DBDRIVER, db="BioSQL/cor6_6.db")
     db = server["OLD"]
     self.assertEqual(len(db), len(original_records))
     # Now read them back...
     biosql_records = [db.lookup(name=rec.name) for rec in original_records]
     # And check they agree
     # Note the old parser used to create BioSQL/cor6_6.db
     # did not record the molecule_type, so remove it here:
     for r in original_records:
         del r.annotations["molecule_type"]
     self.assertTrue(compare_records(original_records, biosql_records))
     server.close()
예제 #60
0
def main(args):

    server = BioSeqDatabase.open_database(driver=args.driver,
                                          db=args.database,
                                          user=args.user,
                                          host=args.host,
                                          passwd=args.password)
    if args.database_name not in server.keys():
        server.new_database(args.database_name)

    db = server[args.database_name]
    try:
        load_img(db, args.directory, args.tax_lookup, args.taxid)
        server.adaptor.commit()
    except:
        server.adaptor.rollback()
        raise