Example #1
0
    def insert_data_from_file(self, filename):
        """Use PostgreSQL's "COPY FROM" statement to perform a bulk insert."""
        self.get_cursor()
        ct = len([True
                  for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if (([
                self.table.cleanup.function, self.table.delimiter,
                self.table.header_rows
        ] == [no_cleanup, ",", 1]) and not self.table.fixed_width and not ct
                and (not hasattr(self.table, "do_not_bulk_insert")
                     or not self.table.do_not_bulk_insert)):
            columns = self.table.get_insert_columns()
            filename = os.path.abspath(filename)
            statement = """
COPY """ + self.table_name() + " (" + columns + """)
FROM '""" + filename.replace("\\", "\\\\") + """'
WITH DELIMITER ','
CSV HEADER;"""
            try:
                self.execute("BEGIN")
                self.execute(statement)
                self.execute("COMMIT")
            except:
                self.connection.rollback()
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #2
0
    def insert_data_from_file(self, filename):
        """Use PostgreSQL's "COPY FROM" statement to perform a bulk insert."""
        self.get_cursor()
        ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if (([self.table.cleanup.function, self.table.delimiter,
              self.table.header_rows] == [no_cleanup, ",", 1])
            and not self.table.fixed_width
            and not ct
            and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)):
            columns = self.table.get_insert_columns()
            filename = os.path.abspath(filename)
            statement = """
COPY """ + self.table_name() + " (" + columns + """)
FROM '""" + filename.replace("\\", "\\\\") + """'
WITH DELIMITER ','
CSV HEADER;"""
            try:
                self.execute("BEGIN")
                self.execute(statement)
                self.execute("COMMIT")
            except BaseException:
                self.connection.rollback()
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #3
0
    def insert_data_from_file(self, filename):
        """Calls MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk 
        insert."""
        self.get_cursor()
        ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if (self.table.cleanup.function == no_cleanup 
            and not self.table.fixed_width 
            and not ct
            and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)
            ):
            print ("Inserting data from " + os.path.basename(filename) + "...")
            
            columns = self.table.get_insert_columns()
            statement = """        
LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """'
INTO TABLE """ + self.table_name() + """
FIELDS TERMINATED BY '""" + self.table.delimiter + """'
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\\n'
IGNORE """ + str(self.table.header_rows) + """ LINES
(""" + columns + ")"
            try:
                self.cursor.execute(statement)
            except Exception as e:
                print "Failed bulk insert (%s), inserting manually" % e
                self.disconnect() # If the execute fails the database connection can get hung up
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #4
0
    def insert_data_from_file(self, filename):
        """Use executemany to perform a high speed bulk insert

        Checks to see if a given file can be bulk inserted, and if so loads
        it in chunks and inserts those chunks into the database using
        executemany.

        """
        CHUNK_SIZE = 1000000
        self.get_cursor()
        ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if (([self.table.cleanup.function, self.table.header_rows] == [no_cleanup, 1])
            and not self.table.fixed_width
            and not ct
            and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)
            ):
            columns = self.table.get_insert_columns()
            filename = os.path.abspath(filename)
            try:
                bulk_insert_statement = self.get_bulk_insert_statement()
                with open(filename, 'r') as data_file:
                    data_chunk = data_file.readlines(CHUNK_SIZE)
                    del(data_chunk[:self.table.header_rows])
                    while data_chunk:
                        data_chunk_split = [row.split(self.table.delimiter)
                                            for row in data_chunk]
                        self.cursor.executemany(bulk_insert_statement, data_chunk_split)
                        data_chunk = data_file.readlines(CHUNK_SIZE)
                self.connection.commit()
            except:
                self.connection.rollback()
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #5
0
    def insert_data_from_file(self, filename):
        """Calls MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk
        insert."""
        self.get_cursor()
        ct = len([True
                  for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if (self.table.cleanup.function == no_cleanup
                and not self.table.fixed_width and not ct
                and (not hasattr(self.table, "do_not_bulk_insert")
                     or not self.table.do_not_bulk_insert)):

            print("Inserting data from " + os.path.basename(filename) + "...")

            columns = self.table.get_insert_columns()
            statement = """
LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """'
INTO TABLE """ + self.table_name() + """
FIELDS TERMINATED BY '""" + self.table.delimiter + """'
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\\n'
IGNORE """ + str(self.table.header_rows) + """ LINES
(""" + columns + ")"
            try:
                self.cursor.execute(statement)
            except Exception as e:
                print "Failed bulk insert (%s), inserting manually" % e
                self.disconnect(
                )  # If the execute fails the database connection can get hung up
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #6
0
    def insert_data_from_file(self, filename):
        """Call MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk insert."""

        mysql_set_autocommit_off = """SET autocommit=0; SET UNIQUE_CHECKS=0; SET FOREIGN_KEY_CHECKS=0; SET sql_log_bin=0;"""
        mysql_set_autocommit_on = """SET GLOBAL innodb_flush_log_at_trx_commit=1; COMMIT; SET autocommit=1; SET unique_checks=1; SET foreign_key_checks=1;"""

        self.get_cursor()
        ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if (self.table.cleanup.function == no_cleanup and
                not self.table.fixed_width and
                not ct and
                (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)):

            print("Inserting data from " + os.path.basename(filename) + "...")

            columns = self.table.get_insert_columns()
            statement = """
LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """'
INTO TABLE """ + self.table_name() + """
FIELDS TERMINATED BY '""" + self.table.delimiter + """'
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\\n'
IGNORE """ + str(self.table.header_rows) + """ LINES
(""" + columns + ")"
            try:
                self.cursor.execute(mysql_set_autocommit_off)
                self.cursor.execute(statement)

                self.cursor.execute(mysql_set_autocommit_on)
            except Exception as e:
                self.disconnect()  # If the execute fails the database connection can get hung up
                self.cursor.execute(mysql_set_autocommit_on)
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #7
0
    def insert_data_from_file(self, filename):
        """Use PostgreSQL's "COPY FROM" statement to perform a bulk insert.

        Current postgres engine bulk only supports comma delimiter
        """
        self.get_cursor()
        p_bulk = [
            self.check_bulk_insert(), self.table.delimiter,
            self.table.header_rows
        ]
        if p_bulk == [True, ",", 1]:
            columns = self.table.get_insert_columns()
            filename = os.path.normpath(os.path.abspath(filename))
            statement = """
COPY """ + self.table_name() + " (" + columns + """)
FROM '""" + filename + """'
WITH DELIMITER ','
CSV HEADER;"""
            try:
                self.execute("BEGIN")
                self.execute(statement)
                self.execute("COMMIT")
                print("Bulk insert on .. ", self.table_name())
                return True
            except Exception:
                self.connection.rollback()
        return Engine.insert_data_from_file(self, filename)
Example #8
0
    def insert_data_from_file(self, filename):
        """Call MySQL "LOAD DATA LOCAL INFILE" statement to perform a bulk insert."""

        mysql_set_autocommit_off = """SET autocommit=0;"""
        mysql_set_autocommit_on = """SET autocommit=1;"""

        self.get_cursor()
        if self.check_bulk_insert():
            print("Inserting data from " + os.path.basename(filename) + "...")
            columns = self.table.get_insert_columns()
            statement = """
BEGIN;
LOAD DATA LOCAL INFILE '""" + filename.replace("\\", "\\\\") + """'
INTO TABLE """ + self.table_name() + """
FIELDS TERMINATED BY '""" + self.table.delimiter + """'
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\\n'
IGNORE """ + str(self.table.header_rows) + """ LINES
(""" + columns + "); COMMIT;"
            try:
                self.cursor.execute(mysql_set_autocommit_off)
                self.cursor.execute(statement)

                self.cursor.execute(mysql_set_autocommit_on)
                return None
            except Exception:
                self.cursor.execute("ROLLBACK;")
                # If the execute fails the database connection can get hung up
                self.disconnect()
                self.cursor.execute(mysql_set_autocommit_on)
        return Engine.insert_data_from_file(self, filename)
Example #9
0
    def insert_data_from_file(self, filename):
        """Perform a high speed bulk insert

        Checks to see if a given file can be bulk inserted, and if so loads
        it in chunks and inserts those chunks into the database using
        executemany.
        """
        chunk_size = 1000000
        self.get_cursor()
        if [self.check_bulk_insert(), self.table.header_rows] == [True, 1]:
            filename = os.path.abspath(filename)
            try:
                bulk_insert_statement = self.get_bulk_insert_statement()
                line_endings = set(['\n', '\r', '\r\n'])
                with open(filename, 'r') as data_file:
                    data_chunk = data_file.readlines(chunk_size)
                    data_chunk = [
                        line.rstrip('\r\n') for line in data_chunk
                        if line not in line_endings
                    ]
                    del data_chunk[:self.table.header_rows]
                    while data_chunk:
                        data_chunk_split = [
                            row.split(self.table.delimiter)
                            for row in data_chunk
                        ]
                        self.cursor.executemany(bulk_insert_statement,
                                                data_chunk_split)
                        data_chunk = data_file.readlines(chunk_size)
                self.connection.commit()
                return True
            except:
                self.connection.rollback()
        return Engine.insert_data_from_file(self, filename)
Example #10
0
    def insert_data_from_file(self, filename):
        """Perform a high speed bulk insert

        Checks to see if a given file can be bulk inserted, and if so loads
        it in chunks and inserts those chunks into the database using
        executemany.
        """
        chunk_size = 1000000
        self.get_cursor()

        # Determine if the dataset includes cross-tab data
        crosstab = len(
            [True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0

        if (([self.table.cleanup.function, self.table.header_rows]
             == [no_cleanup, 1]) and not self.table.fixed_width
                and not crosstab
                and (not hasattr(self.table, "do_not_bulk_insert")
                     or not self.table.do_not_bulk_insert)):
            filename = os.path.abspath(filename)
            try:
                bulk_insert_statement = self.get_bulk_insert_statement()
                line_endings = set(['\n', '\r', '\r\n'])
                with open(filename, 'r') as data_file:
                    data_chunk = data_file.readlines(chunk_size)
                    data_chunk = [
                        line.rstrip('\r\n') for line in data_chunk
                        if line not in line_endings
                    ]
                    del data_chunk[:self.table.header_rows]
                    while data_chunk:
                        data_chunk_split = [
                            row.split(self.table.delimiter)
                            for row in data_chunk
                        ]
                        self.cursor.executemany(bulk_insert_statement,
                                                data_chunk_split)
                        data_chunk = data_file.readlines(chunk_size)
                self.connection.commit()
            except:
                self.connection.rollback()
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #11
0
    def insert_data_from_file(self, filename):
        """Use executemany to perform a high speed bulk insert

        Checks to see if a given file can be bulk inserted, and if so loads
        it in chunks and inserts those chunks into the database using
        executemany.

        """
        CHUNK_SIZE = 1000000
        self.get_cursor()
        ct = len([True
                  for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if (([self.table.cleanup.function, self.table.header_rows]
             == [no_cleanup, 1]) and not self.table.fixed_width and not ct
                and (not hasattr(self.table, "do_not_bulk_insert")
                     or not self.table.do_not_bulk_insert)):
            columns = self.table.get_insert_columns()
            filename = os.path.abspath(filename)
            try:
                bulk_insert_statement = self.get_bulk_insert_statement()
                with open(filename, 'r') as data_file:
                    data_chunk = data_file.readlines(CHUNK_SIZE)
                    del (data_chunk[:self.table.header_rows])
                    while data_chunk:
                        data_chunk_split = [
                            row.split(self.table.delimiter)
                            for row in data_chunk
                        ]
                        self.cursor.executemany(bulk_insert_statement,
                                                data_chunk_split)
                        data_chunk = data_file.readlines(CHUNK_SIZE)
                self.connection.commit()
            except:
                self.connection.rollback()
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #12
0
    def insert_data_from_file(self, filename):
        """Perform a high speed bulk insert

        Checks to see if a given file can be bulk inserted, and if so loads
        it in chunks and inserts those chunks into the database using
        executemany.
        """
        chunk_size = 1000000
        self.get_cursor()

        # Determine if the dataset includes cross-tab data
        crosstab = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0

        if (([self.table.cleanup.function, self.table.header_rows] == [no_cleanup, 1])
            and not self.table.fixed_width
            and not crosstab
            and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)):
            filename = os.path.abspath(filename)
            try:
                bulk_insert_statement = self.get_bulk_insert_statement()
                line_endings = set(['\n', '\r', '\r\n'])
                with open(filename, 'r') as data_file:
                    data_chunk = data_file.readlines(chunk_size)
                    data_chunk = [line.rstrip('\r\n') for line in data_chunk if line not in line_endings]
                    del data_chunk[:self.table.header_rows]
                    while data_chunk:
                        data_chunk_split = [row.split(self.table.delimiter)
                                            for row in data_chunk]
                        self.cursor.executemany(bulk_insert_statement, data_chunk_split)
                        data_chunk = data_file.readlines(chunk_size)
                self.connection.commit()
            except:
                self.connection.rollback()
                return Engine.insert_data_from_file(self, filename)
        else:
            return Engine.insert_data_from_file(self, filename)
Example #13
0
    def insert_data_from_file(self, filename):
        """Perform a bulk insert."""
        self.get_cursor()
        ct = len([True for c in self.table.columns if c[1][0][:3] == "ct-"]) != 0
        if ((self.table.cleanup.function == no_cleanup and not self.table.fixed_width and
                     self.table.header_rows < 2)
            and (self.table.delimiter in ["\t", ","])
            and not ct
            and (not hasattr(self.table, "do_not_bulk_insert") or not self.table.do_not_bulk_insert)
            ):
            print("Inserting data from " + os.path.basename(filename) + "...")

            if self.table.delimiter == "\t":
                fmt = "TabDelimited"
            elif self.table.delimiter == ",":
                fmt = "CSVDelimited"

            if self.table.header_rows == 1:
                hdr = "Yes"
            else:
                hdr = "No"

            columns = self.table.get_insert_columns()

            need_to_delete = False
            add_to_record_id = 0

            if self.table.pk and not self.table.contains_pk:
                if '.' in os.path.basename(filename):
                    proper_name = filename.split('.')
                    newfilename = '.'.join((proper_name[0:-1]) if len(proper_name) > 0 else proper_name[0]
                                           ) + "_new." + filename.split(".")[-1]
                else:
                    newfilename = filename + "_new"

                if not os.path.isfile(newfilename):
                    print("Adding index to " + os.path.abspath(newfilename) + "...")
                    read = open(filename, "rb")
                    write = open(newfilename, "wb")
                    to_write = ""

                    for line in read:
                        to_write += str(id) + self.table.delimiter + line.replace("\n", "\r\n")
                        add_to_record_id += 1
                    self.table.record_id += add_to_record_id

                    write.write(to_write)
                    write.close()
                    read.close()
                    need_to_delete = True
                columns = "record_id, " + columns
            else:
                newfilename = filename

            newfilename = os.path.abspath(newfilename)
            filename_length = (len(os.path.basename(newfilename)) * -1) - 1
            filepath = newfilename[:filename_length]
            statement = """
INSERT INTO """ + self.table_name() + " (" + columns + """)
SELECT * FROM [""" + os.path.basename(newfilename) + ''']
IN "''' + filepath + '''" "Text;FMT=''' + fmt + ''';HDR=''' + hdr + ''';"'''

            try:
                self.execute(statement)
            except:
                print("Couldn't bulk insert. Trying manual insert.")
                self.connection.rollback()

                self.table.record_id -= add_to_record_id

                return Engine.insert_data_from_file(self, filename)

            if need_to_delete:
                os.remove(newfilename)

        else:
            return Engine.insert_data_from_file(self, filename)
Example #14
0
    def insert_data_from_file(self, filename):
        """Perform a bulk insert."""
        self.get_cursor()
        if self.check_bulk_insert() and self.table.header_rows < 2 and (
                self.table.delimiter in ["\t", ","]):
            print("Inserting data from " + os.path.basename(filename) + "...")

            if self.table.delimiter == "\t":
                fmt = "TabDelimited"
            elif self.table.delimiter == ",":
                fmt = "CSVDelimited"

            if self.table.header_rows == 1:
                hdr = "Yes"
            else:
                hdr = "No"

            columns = self.table.get_insert_columns()

            need_to_delete = False
            add_to_record_id = 0

            if self.table.pk and not self.table.contains_pk:
                if '.' in os.path.basename(filename):
                    proper_name = filename.split('.')
                    len_name = len(proper_name)
                    newfilename = '.'.join(
                        proper_name[0:-1] if len_name > 0 else proper_name[0]
                    ) + "_new." + filename.split(".")[-1]
                else:
                    newfilename = filename + "_new"

                if not os.path.isfile(newfilename):
                    print("Adding index to " + os.path.abspath(newfilename) +
                          "...")
                    read = open(filename, "rb")
                    write = open(newfilename, "wb")
                    to_write = ""

                    for line in read:
                        line = line.strip()
                        to_write += str(id) + self.table.delimiter + line
                        add_to_record_id += 1
                    self.table.record_id += add_to_record_id

                    write.write(to_write + os.linesep)
                    write.close()
                    read.close()
                    need_to_delete = True
                columns = "record_id, " + columns
            else:
                newfilename = filename

            newfilename = os.path.abspath(newfilename)
            filename_length = (len(os.path.basename(newfilename)) * -1) - 1
            filepath = newfilename[:filename_length]
            statement = """
INSERT INTO """ + self.table_name() + " (" + columns + """)
SELECT * FROM [""" + os.path.basename(newfilename) + ''']
IN "''' + filepath + '''" "Text;FMT=''' + fmt + ''';HDR=''' + hdr + ''';"'''
            try:
                self.execute(statement)
                return True
            except BaseException:
                print("Couldn't bulk insert. Trying manual insert.")
                self.connection.rollback()
                self.table.record_id -= add_to_record_id
                return None
            finally:
                if need_to_delete:
                    os.remove(newfilename)

        return Engine.insert_data_from_file(self, filename)