Example #1
0
    def load(self, record_iterator, fetch_NCBI_taxonomy=False):
        """Load a set of SeqRecords into the BioSQL database.

        record_iterator is either a list of SeqRecord objects, or an
        Iterator object that returns SeqRecord objects (such as the
        output from the Bio.SeqIO.parse() function), which will be
        used to populate the database.

        fetch_NCBI_taxonomy is boolean flag allowing or preventing
        connection to the taxonomic database on the NCBI server
        (via Bio.Entrez) to fetch a detailed taxonomy for each
        SeqRecord.

        Example:
        from Bio import SeqIO
        count = db.load(SeqIO.parse(open(filename), format))

        Returns the number of records loaded.
        """
        db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \
                                          fetch_NCBI_taxonomy)
        num_records = 0
        for cur_record in record_iterator:
            num_records += 1
            db_loader.load_seqrecord(cur_record)
        return num_records
Example #2
0
    def load(self, record_iterator, fetch_NCBI_taxonomy=False):
        """Load a set of SeqRecords into the BioSQL database.

        record_iterator is either a list of SeqRecord objects, or an
        Iterator object that returns SeqRecord objects (such as the
        output from the Bio.SeqIO.parse() function), which will be
        used to populate the database.

        fetch_NCBI_taxonomy is boolean flag allowing or preventing
        connection to the taxonomic database on the NCBI server
        (via Bio.Entrez) to fetch a detailed taxonomy for each
        SeqRecord.

        Example:
        from Bio import SeqIO
        count = db.load(SeqIO.parse(open(filename), format))

        Returns the number of records loaded.
        """
        db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid, \
                                          fetch_NCBI_taxonomy)
        num_records = 0
        global _POSTGRES_RULES_PRESENT
        for cur_record in record_iterator:
            num_records += 1
            #Hack to work arround BioSQL Bug 2839 - If using PostgreSQL and
            #the RULES are present check for a duplicate record before loading
            if _POSTGRES_RULES_PRESENT:
                #Recreate what the Loader's _load_bioentry_table will do:
                if cur_record.id.count(".") == 1:
                    accession, version = cur_record.id.split('.')
                    try:
                        version = int(version)
                    except ValueError:
                        accession = cur_record.id
                        version = 0
                else:
                    accession = cur_record.id
                    version = 0
                gi = cur_record.annotations.get("gi", None)
                sql = "SELECT bioentry_id FROM bioentry WHERE (identifier " + \
                      "= '%s' AND biodatabase_id = '%s') OR (accession = " + \
                      "'%s' AND version = '%s' AND biodatabase_id = '%s')"
                self.adaptor.execute(
                    sql % (gi, self.dbid, accession, version, self.dbid))
                if self.adaptor.cursor.fetchone():
                    try:
                        raise self.adaptor.conn.IntegrityError(
                            "Duplicate record "
                            "detected: record has not been inserted")
                    except AttributeError:  #psycopg version 1
                        import psycopg
                        raise psycopg.IntegrityError(
                            "Psycopg1: Duplicate record "
                            "detected: record has not been inserted")
            #End of hack
            db_loader.load_seqrecord(cur_record)
        return num_records