def insert_into_table(self, table_name, cols, values, update_on_duplicate=True, db_name=None): """Insert the values in the columns of the given Table. Parameters ---------- table_name: string Name of the table to add the records to cols: (numpy) array of strings List of the columns to fill values: (numpy) array of tuples List of the values to fill. update_on_duplicate: boolean Determines what happens if a duplicate key is encountered If True: the record is updated (default) If False: the insertion is ignored db_name: string Name of the database in which the table is defined. If no database name is given, the current database is used. Returns ------- None Prerequisites ------------- Connection to a database shall have been made Table `table_name` shall exists Table `table_name` shall contain all columns in `cols` Postconditions -------------- If possible, all records are added to the Table Notes ----- Only 1000 records can be added in a single INSERT INTO statement (MySQL restriction) This method breaks up the insertion in multiple insertions of a 1000 when more than 1000 records need to be inserted. """ Assert.py_type(table_name, str, 'table_name') if db_name is not None: Assert.py_type(db_name, str, 'db_name') table_name = db_name + '.' + table_name Assert.nonemptylist(cols) for col in cols: Assert.py_type(col, str, 'Column {0}'.format(col)) nr_cols = len(cols) Assert.nonemptylist(values) for val in values: Assert.nonemptylist(val, nr_cols) assert self.current_database() is not None, \ 'Connection to a database must have been made.' # Split up the values in chunks of 1,000 max_inserts = 1000 nr_queries = np.ceil(len(values) / max_inserts) list_values = np.array_split(values, nr_queries) for vals in list_values: # Build up the query if update_on_duplicate: ignore = '' else: ignore = 'IGNORE ' q = "INSERT {0}INTO {1}\n".format(ignore, table_name) q += "(" for i_col, col in enumerate(cols): q += col if i_col < nr_cols - 1: q += ", " q += ")\n" q += "VALUES\n" for i_val, val in enumerate(vals): q += "(" for i_field, field in enumerate(val): # Field with quotes if necessary if field is None: q += "NULL" elif isinstance(field, str): if field.upper() == "NULL": q += "NULL" else: q += "'{0}'".format(field) elif isinstance(field, date) or isinstance(field, time): q += "'{0}'".format(field) elif isinstance(field, Decimal): # TODO: Note! Decimal is converted here to float! # In principle, this should not matter, if you insert it # in the database again, the desired precision is maintained. q += "{}".format(float(field)) elif np.isnan(field): q += "NULL" elif np.isscalar(field): q += "{0}".format(field) else: msg = 'Type {0} not recognized'.format(type(field)) raise AssertionError(msg) # Trailing comma if i_field < nr_cols - 1: q += ", " q += ")" if i_val < len(vals) - 1: q += "," q += "\n" if update_on_duplicate: q += "ON DUPLICATE KEY UPDATE\n" for i_col, col in enumerate(cols): q += "{0}=VALUES({0})".format(col) if i_col < nr_cols - 1: q += ",\n" # Execute the query self.execute(q)