def test_convert_dictionary_to_mysql_table_function(self): from fundamentals.mysql import writequery sqlQuery = "DROP TABLE IF EXISTS `testing_table`; CREATE TABLE IF NOT EXISTS `testing_table` (`id` INT NOT NULL,`uniquekey1` varchar(45) NOT NULL default 'ha',`uniqueKey2` varchar(45) NOT NULL default 'ha', PRIMARY KEY (`id`))" writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=False) dictionary = { "a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers" } from fundamentals.mysql import convert_dictionary_to_mysql_table message = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], createHelperTables=False, dateModified=False, returnInsertOnly=False)
def test_return_inserts_with_datetime_pre_compiled(self): from fundamentals.mysql import writequery sqlQuery = "CREATE TABLE IF NOT EXISTS `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))" writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=False) dictionary = { "a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers" } from fundamentals.mysql import convert_dictionary_to_mysql_table message = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], createHelperTables=False, dateModified=False, returnInsertOnly=True, reDatetime=reDatetime)
def list_of_dictionaries_to_mysql_inserts( log, datalist, tableName): """Convert a python list of dictionaries to pretty csv output **Key Arguments:** - ``log`` -- logger - ``datalist`` -- a list of dictionaries - ``tableName`` -- the name of the table to create the insert statements for **Return:** - ``output`` -- the mysql insert statements (as a string) **Usage:** .. code-block:: python from fundamentals.files import list_of_dictionaries_to_mysql_inserts mysqlInserts = list_of_dictionaries_to_mysql_inserts( log=log, datalist=dataList, tableName="my_new_table" ) print mysqlInserts this output the following: .. code-block:: plain INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES ("cool" ,"super cool" ,"2016-09-14T13:17:26" ,"burgers" ,"cheese") ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super cool", dateCreated="2016-09-14T13:17:26", uniqueKey2="burgers", uniquekey1="cheese" ; ... ... """ log.debug('starting the ``list_of_dictionaries_to_mysql_inserts`` function') if not len(datalist): return "NO MATCH" inserts = [] for d in datalist: insertCommand = convert_dictionary_to_mysql_table( log=log, dictionary=d, dbTableName="testing_table", uniqueKeyList=[], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False ) inserts.append(insertCommand) output = ";\n".join(inserts) + ";" log.debug('completed the ``list_of_dictionaries_to_mysql_inserts`` function') return output
def list_of_dictionaries_to_mysql_inserts(log, datalist, tableName): """Convert a python list of dictionaries to pretty csv output **Key Arguments:** - ``log`` -- logger - ``datalist`` -- a list of dictionaries - ``tableName`` -- the name of the table to create the insert statements for **Return:** - ``output`` -- the mysql insert statements (as a string) **Usage:** .. code-block:: python from fundamentals.files import list_of_dictionaries_to_mysql_inserts mysqlInserts = list_of_dictionaries_to_mysql_inserts( log=log, datalist=dataList, tableName="my_new_table" ) print mysqlInserts this output the following: .. code-block:: plain INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES ("cool" ,"super cool" ,"2016-09-14T13:17:26" ,"burgers" ,"cheese") ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super cool", dateCreated="2016-09-14T13:17:26", uniqueKey2="burgers", uniquekey1="cheese" ; ... ... """ log.info('starting the ``list_of_dictionaries_to_mysql_inserts`` function') if not len(datalist): return "NO MATCH" inserts = [] for d in datalist: insertCommand = convert_dictionary_to_mysql_table( log=log, dictionary=d, dbTableName="testing_table", uniqueKeyList=[], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False) inserts.append(insertCommand) output = ";\n".join(inserts) + ";" log.info( 'completed the ``list_of_dictionaries_to_mysql_inserts`` function') return output
def test_convert_dictionary_to_mysql_table_function(self): dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} from fundamentals.mysql import convert_dictionary_to_mysql_table message = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], createHelperTables=False, dateModified=False, returnInsertOnly=False )
def test_convert_dictionary_to_mysql_table_function(self): dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} from fundamentals.mysql import convert_dictionary_to_mysql_table message = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], createHelperTables=False, dateModified=False, returnInsertOnly=False )
def _list_of_dictionaries_to_mysql_inserts(self, tableName, createStatement=None): """Convert a python list of dictionaries to pretty csv output **Key Arguments:** - ``tableName`` -- the name of the table to create the insert statements for - ``createStatement`` -- add this create statement to the top of the file. Will only be executed if no table of that name exists in database. Default *None* **Return:** - ``output`` -- the mysql insert statements (as a string) """ self.log.debug( 'completed the ````_list_of_dictionaries_to_mysql_inserts`` function' ) if not len(self.listOfDictionaries): return "NO MATCH" dataCopy = copy.deepcopy(self.listOfDictionaries) if createStatement: output = createStatement + "\n" else: output = "" inserts = [] inserts = [] inserts[:] = [ convert_dictionary_to_mysql_table(log=self.log, dictionary=d, dbTableName=tableName, uniqueKeyList=[], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False, reDatetime=self.reDatetime) for d in dataCopy ] output += ";\n".join(inserts) + ";" self.log.debug( 'completed the ``_list_of_dictionaries_to_mysql_inserts`` function' ) return output
def test_return_inserts_non_batch(self): dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} from fundamentals.mysql import convert_dictionary_to_mysql_table inserts = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False ) print inserts
def test_return_inserts_non_batch(self): dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} from fundamentals.mysql import convert_dictionary_to_mysql_table inserts = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False ) print inserts
def _list_of_dictionaries_to_mysql_inserts( self, tableName, createStatement=None): """Convert a python list of dictionaries to pretty csv output **Key Arguments:** - ``tableName`` -- the name of the table to create the insert statements for - ``createStatement`` -- add this create statement to the top of the file. Will only be executed if no table of that name exists in database. Default *None* **Return:** - ``output`` -- the mysql insert statements (as a string) """ self.log.debug( 'completed the ````_list_of_dictionaries_to_mysql_inserts`` function') if not len(self.listOfDictionaries): return "NO MATCH" dataCopy = copy.deepcopy(self.listOfDictionaries) if createStatement: output = createStatement + "\n" else: output = "" inserts = [] inserts = [] inserts[:] = [convert_dictionary_to_mysql_table(log=self.log, dictionary=d, dbTableName=tableName, uniqueKeyList=[ ], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False, reDatetime=self.reDatetime) for d in dataCopy] output += ";\n".join(inserts) + ";" self.log.debug( 'completed the ``_list_of_dictionaries_to_mysql_inserts`` function') return output
def add_yaml_file_content_to_database(self, filepath, deleteFile=False): """*given a file to a yaml file, add yaml file content to database* **Key Arguments:** - ``filepath`` -- the path to the yaml file - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False* **Return:** - None **Usage:** To parse and import the contents of a single yaml file into the database, use the following: .. code-block:: python from fundamentals.mysql import yaml_to_database # PARSE YAML FILE CONTENTS AND ADD TO DATABASE yaml2db = yaml_to_database( log=log, settings=settings, dbConn=dbConn ) yaml2db.add_yaml_file_content_to_database( filepath=${1:"/path/to/file.yaml"}, deleteFile=True ) """ self.log.debug( 'completed the ````add_yaml_file_content_to_database`` method') import codecs try: self.log.debug("attempting to open the file %s" % (filepath, )) readFile = codecs.open(filepath, encoding='utf-8', mode='r') thisData = readFile.read() readFile.close() except IOError as e: message = 'could not open the file %s' % (filepath, ) self.log.critical(message) raise IOError(message) readFile.close() matchObject = re.finditer( r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)', thisData, flags=re.M | re.S # re.S ) yamlContent = {} for match in matchObject: if match.group("value")[0] == '"' and match.group( "value")[-1] == '"': v = match.group("value")[1:-1] elif match.group("value")[0] == "'" and match.group( "value")[-1] == "'": v = match.group("value")[1:-1] else: v = match.group("value") yamlContent[match.group("key")] = v if "table" not in yamlContent: self.log.warning( 'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s' % locals()) return None # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE dbTablesTmp = yamlContent["table"].split(",") del yamlContent["table"] dbTables = [] dbTables[:] = [d.strip() for d in dbTablesTmp] # UNSHORTEN URL try: r = requests.head(yamlContent["url"], allow_redirects=True) yamlContent["url"] = r.url except: pass yamlContent["original_yaml_path"] = filepath if "url" in yamlContent: uniqueKeyList = ["url"] else: uniqueKeyList = [] for t in dbTables: convert_dictionary_to_mysql_table(dbConn=self.dbConn, log=self.log, dictionary=yamlContent, dbTableName=t, uniqueKeyList=uniqueKeyList, dateModified=True, returnInsertOnly=False, replace=True) if deleteFile: os.remove(filepath) self.log.debug( 'completed the ``add_yaml_file_content_to_database`` method') return None
def insert_list_of_dictionaries_into_database_tables(dbConn, log, dictList, dbTableName, uniqueKeyList=[], dateModified=False, dateCreated=True, batchSize=2500, replace=False, dbSettings=False): """insert list of dictionaries into database tables **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dictList`` -- list of python dictionaries to add to the database table - ``dbTableName`` -- name of the database table - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database - ``dateModified`` -- add the modification date as a column in the database - ``dateCreated`` -- add the created date as a column in the database - ``batchSize`` -- batch the insert commands into *batchSize* batches - ``replace`` -- repalce row if a duplicate is found - ``dbSettings`` -- pass in the database settings so multiprocessing can establish one connection per process (might not be faster) **Return:** - None **Usage:** .. code-block:: python from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables insert_list_of_dictionaries_into_database_tables( dbConn=dbConn, log=log, dictList=dictList, dbTableName="test_insert_many", uniqueKeyList=["col1", "col3"], dateModified=False, batchSize=2500 ) """ log.debug( 'starting the ````insert_list_of_dictionaries_into_database_tables`` function' ) global count global totalCount global globalDbConn global sharedList reDate = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') if dbSettings: globalDbConn = dbSettings else: globalDbConn = dbConn if len(dictList) == 0: log.warning('the dictionary to be added to the database is empty' % locals()) return None if len(dictList): # FIND BUG IN MYSQL QUERY BY UNCOMMENTING # tot = len(dictList) # for index, d in enumerate(dictList): # if index > 1: # # Cursor up one line and clear line # sys.stdout.write("\x1b[1A\x1b[2K") # percent = (float(index) / float(tot)) * 100. # print('%(index)s/%(tot)s (%(percent)1.1f%% done)' % locals()) # convert_dictionary_to_mysql_table( # dbConn=dbConn, # log=log, # dictionary=d, # dbTableName=dbTableName, # uniqueKeyList=uniqueKeyList, # dateModified=dateModified, # reDatetime=reDate, # replace=replace, # dateCreated=dateCreated) # sys.exit(0) convert_dictionary_to_mysql_table(dbConn=dbConn, log=log, dictionary=dictList[0], dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, reDatetime=reDate, replace=replace, dateCreated=dateCreated) dictList = dictList[1:] dbConn.autocommit(False) if len(dictList): total = len(dictList) batches = int(old_div(total, batchSize)) start = 0 end = 0 sharedList = [] for i in range(batches + 1): end = end + batchSize start = i * batchSize thisBatch = dictList[start:end] sharedList.append((thisBatch, end)) totalCount = total + 1 ltotalCount = totalCount print("Starting to insert %(ltotalCount)s rows into %(dbTableName)s" % locals()) if dbSettings == False: fmultiprocess(log=log, function=_insert_single_batch_into_database, inputArray=list(range(len(sharedList))), dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, replace=replace, batchSize=batchSize, reDatetime=reDate, dateCreated=dateCreated) else: fmultiprocess(log=log, function=_add_dictlist_to_database_via_load_in_file, inputArray=list(range(len(sharedList))), dbTablename=dbTableName, dbSettings=dbSettings, dateModified=dateModified) sys.stdout.write("\x1b[1A\x1b[2K") print( "%(ltotalCount)s / %(ltotalCount)s rows inserted into %(dbTableName)s" % locals()) log.debug( 'completed the ``insert_list_of_dictionaries_into_database_tables`` function' ) return None
def _insert_single_batch_into_database(batchIndex, log, dbTableName, uniqueKeyList, dateModified, replace, batchSize, reDatetime, dateCreated): """*summary of function* **Key Arguments:** - ``batchIndex`` -- the index of the batch to insert - ``dbConn`` -- mysql database connection - ``log`` -- logger **Return:** - None **Usage:** .. todo:: add usage info create a sublime snippet for usage .. code-block:: python usage code """ log.debug('starting the ``_insert_single_batch_into_database`` function') global totalCount global globalDbConn global sharedList batch = sharedList[batchIndex] reDate = reDatetime if isinstance(globalDbConn, dict): # SETUP ALL DATABASE CONNECTIONS dbConn = database(log=log, dbSettings=globalDbConn, autocommit=False).connect() else: dbConn = globalDbConn count = batch[1] if count > totalCount: count = totalCount ltotalCount = totalCount inserted = False while inserted == False: if not replace: insertVerb = "INSERT IGNORE" else: insertVerb = "INSERT IGNORE" uniKeys = set().union(*(list(d.keys()) for d in batch[0])) tmp = [] tmp[:] = [m.replace(" ", "_").replace("-", "_") for m in uniKeys] uniKeys = tmp myKeys = '`,`'.join(uniKeys) vals = [ tuple([None if d[k] in ["None", None] else d[k] for k in uniKeys]) for d in batch[0] ] valueString = ("%s, " * len(vals[0]))[:-2] insertCommand = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ valueString + """, NOW())""" if not dateCreated: insertCommand = insertCommand.replace(", dateCreated)", ")").replace( ", NOW())", ")") dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " for k in uniKeys: dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() dup = """%(dup)s updated=1, dateLastModified=NOW()""" % locals() insertCommand = insertCommand + dup insertCommand = insertCommand.replace('\\""', '\\" "') insertCommand = insertCommand.replace('""', "null") insertCommand = insertCommand.replace('"None"', 'null') message = "" # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue)) try: message = writequery(log=log, sqlQuery=insertCommand, dbConn=dbConn, Force=True, manyValueList=vals) except: theseInserts = [] for aDict in batch[0]: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, returnInsertOnly=True, replace=replace, reDatetime=reDate, skipChecks=True) theseInserts.append(valueTuple) message = "" # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue)) message = writequery(log=log, sqlQuery=insertCommand, dbConn=dbConn, Force=True, manyValueList=theseInserts) if message == "unknown column": for aDict in batch: convert_dictionary_to_mysql_table(dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, reDatetime=reDate, replace=replace) else: inserted = True dbConn.commit() log.debug('completed the ``_insert_single_batch_into_database`` function') return "None"
class yaml_to_database(): """ *Take key-values from yaml files including a tablename(s) and add them to a mysql database table* **Key Arguments:** - ``log`` -- logger - ``settings`` -- the settings dictionary - ``pathToInputDir`` -- path to the directory containing the yaml files that will be added to the database table(s). Default *False* - ``dbConn`` -- connection to database to add the content to - ``deleteFiles`` - - delete the yamls files once their content has been added to the database. Default * False* **Usage:** To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_). To initiate a ``yaml2db`` object, use the following: .. code-block:: python from fundamentals.mysql import yaml_to_database yaml2db = yaml_to_database( log=log, settings=settings, dbConn=dbConn, pathToInputDir="/path/to/yaml/directory", deleteFiles=False ) And here's an example of the content in a yaml file that this ``yaml2db`` object can parse: .. code-block:: yaml title: Why you should do most of your text editing in : Sublime Text | Sublime Text Tips url: http://sublimetexttips.com/why-you-should-do-most-of-your-text-editing-in-sublime-text/?utm_source=drip&utm_medium=email&utm_campaign=editor-proliferation kind: webpage subtype: article table: web_articles,podcasts """ # Initialisation def __init__(self, log, dbConn, pathToInputDir=False, settings=False, deleteFiles=False): self.log = log log.debug("instansiating a new 'yaml_to_database' object") self.settings = settings self.pathToInputDir = pathToInputDir self.dbConn = dbConn self.deleteFiles = deleteFiles # xt-self-arg-tmpx return None def ingest(self): """ *ingest the contents of the directory of yaml files into a database* **Return:** - None **Usage:** To import an entire directory of yaml files into a database, use the following: .. code-block:: python from fundamentals.mysql import yaml_to_database yaml2db = yaml_to_database( log=log, settings=settings, dbConn=dbConn, pathToInputDir="/path/to/yaml/directory", deleteFiles=False ) yaml2db.ingest() """ self.log.info('starting the ``ingest`` method') for d in os.listdir(self.pathToInputDir): if os.path.isfile(os.path.join(self.pathToInputDir, d)) and "yaml" in d.lower(): self.add_yaml_file_content_to_database( filepath=os.path.join(self.pathToInputDir, d), deleteFile=self.deleteFiles) self.log.info('completed the ``ingest`` method') return None def add_yaml_file_content_to_database(self, filepath, deleteFile=False): """*given a file to a yaml file, add yaml file content to database* **Key Arguments:** - ``filepath`` -- the path to the yaml file - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False* **Return:** - None **Usage:** To parse and import the contents of a single yaml file into the database, use the following: .. code-block:: python from fundamentals.mysql import yaml_to_database # PARSE YAML FILE CONTENTS AND ADD TO DATABASE yaml2db = yaml_to_database( log=log, settings=settings, dbConn=dbConn ) yaml2db.add_yaml_file_content_to_database( filepath=${1:"/path/to/file.yaml"}, deleteFile=True ) """ self.log.info( 'starting the ``add_yaml_file_content_to_database`` method') import codecs try: self.log.debug("attempting to open the file %s" % (filepath, )) readFile = codecs.open(filepath, encoding='utf-8', mode='r') thisData = readFile.read() readFile.close() except IOError, e: message = 'could not open the file %s' % (filepath, ) self.log.critical(message) raise IOError(message) readFile.close() matchObject = re.finditer( r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)', thisData, flags=re.M | re.S # re.S ) yamlContent = {} for match in matchObject: if match.group("value")[0] == '"' and match.group( "value")[-1] == '"': v = match.group("value")[1:-1] elif match.group("value")[0] == "'" and match.group( "value")[-1] == "'": v = match.group("value")[1:-1] else: v = match.group("value") yamlContent[match.group("key")] = v if "table" not in yamlContent: self.log.warning( 'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s' % locals()) return None # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE dbTablesTmp = yamlContent["table"].split(",") del yamlContent["table"] dbTables = [] dbTables[:] = [d.strip() for d in dbTablesTmp] # UNSHORTEN URL try: r = requests.head(yamlContent["url"], allow_redirects=True) yamlContent["url"] = r.url except: pass yamlContent["original_yaml_path"] = filepath if "url" in yamlContent: uniqueKeyList = ["url"] else: uniqueKeyList = [] for t in dbTables: convert_dictionary_to_mysql_table(dbConn=self.dbConn, log=self.log, dictionary=yamlContent, dbTableName=t, uniqueKeyList=uniqueKeyList, dateModified=True, returnInsertOnly=False, replace=True) if deleteFile: os.remove(filepath) self.log.info( 'completed the ``add_yaml_file_content_to_database`` method') return None
def insert_list_of_dictionaries_into_database_tables(dbConn, log, dictList, dbTableName, uniqueKeyList=[], dateModified=False, batchSize=2500, replace=False, dbSettings=False): """insert list of dictionaries into database tables **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dictList`` -- list of python dictionaries to add to the database table - ``dbTableName`` -- name of the database table - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database - ``dateModified`` -- add the modification date as a column in the database - ``batchSize`` -- batch the insert commands into *batchSize* batches - ``replace`` -- repalce row if a duplicate is found - ``dbSettings`` -- pass in the database settings so multiprocessing can establish one connection per process (might not be faster) **Return:** - None **Usage:** .. code-block:: python from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables insert_list_of_dictionaries_into_database_tables( dbConn=dbConn, log=log, dictList=dictList, dbTableName="test_insert_many", uniqueKeyList=["col1", "col3"], dateModified=False, batchSize=2500 ) """ log.info( 'starting the ``insert_list_of_dictionaries_into_database_tables`` function' ) global count global totalCount global globalDbConn global sharedList reDate = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') if dbSettings: globalDbConn = dbSettings else: globalDbConn = dbConn if len(dictList) == 0: log.warning('the dictionary to be added to the database is empty' % locals()) return None if len(dictList): convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictList[0], dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, reDatetime=reDate, replace=replace, ) dbConn.autocommit(False) total = len(dictList[1:]) batches = int(total / batchSize) start = 0 end = 0 sharedList = [] for i in range(batches + 1): end = end + batchSize start = i * batchSize thisBatch = dictList[start:end] sharedList.append((thisBatch, end)) totalCount = total + 1 ltotalCount = totalCount print "Starting to insert %(ltotalCount)s rows into %(dbTableName)s" % locals( ) if dbSettings == False: fmultiprocess(log=log, function=_insert_single_batch_into_database, inputArray=range(len(sharedList)), dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, replace=replace, batchSize=batchSize, reDatetime=reDate) else: fmultiprocess(log=log, function=_add_dictlist_to_database_via_load_in_file, inputArray=range(len(sharedList)), dbTablename=dbTableName, dbSettings=dbSettings) sys.stdout.write("\x1b[1A\x1b[2K") print "%(ltotalCount)s / %(ltotalCount)s rows inserted into %(dbTableName)s" % locals( ) log.info( 'completed the ``insert_list_of_dictionaries_into_database_tables`` function' ) return None
def insert_list_of_dictionaries_into_database_tables(dbConn, log, dictList, dbTableName, uniqueKeyList=[], dateModified=False, batchSize=2500, replace=False): """insert list of dictionaries into database tables **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dictList`` -- list of python dictionaries to add to the database table - ``dbTableName`` -- name of the database table - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database - ``dateModified`` -- add the modification date as a column in the database - ``batchSize`` -- batch the insert commands into *batchSize* batches - ``replace`` -- repalce row if a duplicate is found **Return:** - None **Usage:** .. code-block:: python from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables insert_list_of_dictionaries_into_database_tables( dbConn=dbConn, log=log, dictList=dictList, dbTableName="test_insert_many", uniqueKeyList=["col1", "col3"], dateModified=False, batchSize=2500 ) """ log.info( 'starting the ``insert_list_of_dictionaries_into_database_tables`` function' ) if len(dictList) == 0: log.warning('the dictionary to be added to the database is empty' % locals()) return None if len(dictList): convert_dictionary_to_mysql_table(dbConn=dbConn, log=log, dictionary=dictList[0], dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, replace=replace) total = len(dictList[1:]) batches = int(total / batchSize) start = 0 end = 0 theseBatches = [] for i in range(batches + 1): end = end + batchSize start = i * batchSize thisBatch = dictList[start:end] theseBatches.append(thisBatch) totalCount = total count = 0 for batch in theseBatches: count += len(batch) if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount print "%(count)s / %(totalCount)s rows inserted into %(dbTableName)s" % locals( ) inserted = False while inserted == False: theseInserts = [] for aDict in batch: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, returnInsertOnly=True, replace=replace) theseInserts.append(valueTuple) message = "" # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue)) message = writequery(log=log, sqlQuery=insertCommand, dbConn=dbConn, Force=True, manyValueList=theseInserts) if message == "unknown column": sys.exit(0) for aDict in batch: convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, replace=replace) else: inserted = True log.info( 'completed the ``insert_list_of_dictionaries_into_database_tables`` function' ) return None