def insert(self, table, dataframe, **args): onDupUpdateKey = [] for c in dataframe.columns: if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod': onDupUpdateKey.append('%s=VALUES(%s)' % (c.replace(" ", ""), c.replace(" ", ""))) sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % ( table, ','.join(dataframe.columns), ','.join(['%s'] * len(dataframe.columns)), 'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey), ) try: row = [tuple(r) for r in dataframe.values] self.__sqlConnect.cursor().executemany(sql_insert, row) # NB : you won't get an IntegrityError when reading self.__sqlConnect.commit() except mysql.connector.Error as err: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, err)) finally: self.__sqlConnect.cursor().close()
def saveDataFrame(self, dataframe, table): try: dataframe.to_sql(name=table, con=self.__con.getEngine(), index=False, if_exists='append', dtype=self.__con.getDbType()[table]) except DBAPIError as e: LogHandler.log_exceptions("Sql Exceptions: %s\r\n" % e) pass finally: pass
def __init__(self): self.__db_name = dbConfig['database'] LogHandler.log_msg("DB engine initializing...") try: self.engine = create_engine( create_engine('mysql://%s:%s@%s:%s/%s' % ( dbConfig['user'], dbConfig['password'], dbConfig['host'], dbConfig['port'], dbConfig['database'], ), echo=False)) except DBAPIError as err: LogHandler.log_exceptions(err) finally: LogHandler.log_msg("Done.")
def engine_insert_update(self, dataframe, table, **args): onDupUpdateKey = [] for c in dataframe.columns: if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod': onDupUpdateKey.append('%s=VALUES(%s)' % (c.replace(" ", ""), c.replace(" ", ""))) sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % ( table, ','.join(dataframe.columns), ','.join(['%s'] * len(dataframe.columns)), 'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey), ) try: with self.__engine.connect() as con: row = [tuple(x) for x in dataframe.values] con.execute(sql_insert, *row) except DBAPIError as err: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, err)) except DataError as e: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, e)) except OperationalError as ope: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, ope)) pass
thread_num = 4 file_nums = 0 threaded_file = [] if __name__ == '__main__': start_time = time.process_time_ns() for root, directories, files in os.walk("csv"): all_files = len(files) sublist_size = round(len(files) / thread_num) threaded_file = [ files[i:i + sublist_size] for i in range(0, len(files), sublist_size) ] threads = [] for index, tf in enumerate(threaded_file): p = StreamThread(sublist=tf, threadCode="Thread-{}".format(index)) threads.append(p) for th in threads: try: th.start() th.join() except Error as err: LogHandler.log_exceptions( "Error: unable to start thread, msg: {0}".format(err)) end_time = time.process_time_ns() print("All threads time spend: %sms" % round( (end_time - start_time) / 1000000, 5))