def replace(self, table, data, pk_fields=None, commit=True, echo=True): "Upsert data into database" s_t = datetime.datetime.now() if not len(data): return False row = next(data) if is_gen_func(data) else data[0] mode = 'namedtuple' if isnamedtupleinstance(row) else 'dict' fields = row._fields if mode == 'namedtuple' else sorted(row.keys()) values = [i + 1 for i in range(len(fields)) ] if mode == 'namedtuple' else fields connection = self.engine.raw_connection() connection.autocommit = False cursor = connection.cursor() sql = self._template('core.replace').format( table=table, names=', '.join(fields), values=', '.join(['?' for val in values]), ) try: counter = 0 if is_gen_func(data): batch = [row] for row in data: batch.append(row) if len(batch) == self.batch_size: cursor.executemany(sql, batch) counter += len(batch) batch = [] if len(batch): cursor.executemany(sql, batch) counter += len(batch) else: # cursor.bindvars = None cursor.executemany(sql, data) counter += len(data) if commit: connection.commit() else: return counter except Exception as e: log(Exception('Error for SQL: ' + sql)) raise e finally: cursor.close() connection.commit() connection.close() secs = (datetime.datetime.now() - s_t).total_seconds() mins = round(secs / 60, 1) rate = round(counter / secs, 1) if echo: log("Inserted {} records into table '{}' in {} mins [{} r/s].". format(counter, table, mins, rate)) return counter
def insert(self, table, data, echo=True): headers = next(data) if is_gen_func(data) else data[0] mode = 'namedtuple' if isnamedtupleinstance(headers) else 'dict' fields = headers._fields if mode == 'namedtuple' else sorted( headers.keys()) values = [i + 1 for i in range(len(fields)) ] if mode == 'namedtuple' else fields connection = self.engine.raw_connection() connection.autocommit = False cursor = connection.cursor() sql = self._template('core.insert').format( table=table, names=', \n'.join([self._fix_f_name(f) for f in fields]), values=', \n'.join(['%s'] * len(values)), ) i = 1 def get_batch(): for r, row in enumerate(data): yield row cols_str = ', '.join(fields) copy_sql = '''COPY {} ({}) FROM stdin WITH CSV DELIMITER '|' QUOTE '"' ESCAPE '"' '''.format( table, cols_str) try: deli = '|' s_t = datetime.datetime.now() if is_gen_func(data): cursor.execute(sql, headers) # insert first row counter = 1 else: counter = 0 temp_file_path = '{}/batch_sql.csv'.format(self.tmp_folder) batch_f = open(temp_file_path, 'w') batch_w = csv.writer(batch_f, delimiter=deli, quoting=csv.QUOTE_MINIMAL) for r, row in enumerate(data): batch_w.writerow(row) counter += 1 # if counter % self.batch_size == 0: # cursor.copy_expert(copy_sql, open(temp_file_path, 'r')) # batch_f = open(temp_file_path, 'w') # batch_w = csv.writer( # batch_f, delimiter=deli, quoting=csv.QUOTE_MINIMAL) batch_f.close() cursor.copy_expert(copy_sql, open(temp_file_path, 'r')) os.remove(temp_file_path) except Exception as e: log(Exception('Error for SQL: ' + sql)) raise e finally: cursor.close() connection.commit() connection.close() secs = (datetime.datetime.now() - s_t).total_seconds() mins = round(secs / 60, 1) rate = round(counter / secs, 1) if echo: log("Inserted {} records into table '{}' in {} mins [{} r/s].". format( counter, table, mins, rate, )) return counter
def update( self, table, data, pk_fields, commit=True, echo=True, temp_table=None, ): "Update data in database" s_t = datetime.datetime.now() if not len(data): return False row = next(data) if is_gen_func(data) else data[0] mode = 'namedtuple' if isnamedtupleinstance(row) else 'dict' fields = row._fields if mode == 'namedtuple' else sorted(row.keys()) if mode == 'namedtuple' and not temp_table: data = [r._asdict() for r in data] mode = 'dict' if temp_table: raise Exception('temp_table UPDATE is not supported in SQLite.') connection = self.engine.raw_connection() connection.autocommit = False cursor = connection.cursor() pk_fields_set = set(pk_fields) sql_tmpl = 'core.update' sql_tmpl = sql_tmpl + '_temp' if temp_table else sql_tmpl sql = self._template(sql_tmpl).format( table=table, set_fields=',\n'.join([ '{f} = :{f}'.format(f=f) for i, f in enumerate(fields) if f not in pk_fields_set ]), pk_fields_equal=' and '.join( ['{f} = :{f}'.format(f=f) for f in pk_fields]), set_fields2=',\n'.join([ '{f} = t2.{f}'.format(f=f) for i, f in enumerate(fields) if f not in pk_fields_set ]), pk_fields_equal2=' and '.join( ['t1.{f} = t2.{f}'.format(f=f) for f in pk_fields_set]), temp_table=temp_table, ) if temp_table: # drop / create temp table self.drop_table(temp_table) self.execute( 'create table {} as select * from {} where 1=0'.format( temp_table, table, ), echo=False, ) self.insert(temp_table, data, echo=False) self.execute(sql, echo=False) self.execute('drop table if exists ' + temp_table, echo=False) counter = len(data) else: try: counter = 0 if is_gen_func(data): batch = [row] for row in data: batch.append(row) if len(batch) == self.batch_size: cursor.executemany(sql, batch) counter += len(batch) batch = [] if len(batch): cursor.executemany(sql, batch) counter += len(batch) else: # cursor.bindvars = None cursor.executemany(sql, data) counter += len(data) if commit: connection.commit() else: return counter except Exception as e: log(Exception('Error for SQL: ' + sql)) raise e finally: cursor.close() connection.commit() connection.close() secs = (datetime.datetime.now() - s_t).total_seconds() mins = round(secs / 60, 1) rate = round(counter / secs, 1) if echo: log("Inserted {} records into table '{}' in {} mins [{} r/s].". format(counter, table, mins, rate)) return counter
def replace(self, table, data, pk_fields, field_types=None, commit=True, echo=True): "Insert/Update records of namedtuple or dicts" import cx_Oracle s_t = datetime.datetime.now() cx_data_map = dict( string=cx_Oracle.STRING, integer=cx_Oracle.NUMBER, decimal=cx_Oracle.NUMBER, date=cx_Oracle.DATETIME, datetime=cx_Oracle.DATETIME, text=cx_Oracle.CLOB, ) if not len(data): return False row = next(data) if is_gen_func(data) else data[0] mode = 'namedtuple' if isnamedtupleinstance(row) else 'dict' fields = row._fields if mode == 'namedtuple' else sorted(row.keys()) values = [i + 1 for i in range(len(fields))] if mode == 'namedtuple' else fields connection = self.engine.raw_connection() connection.autocommit = False cursor = connection.cursor() pk_fields_set = set(pk_fields) sql = self._template('core.replace').format( table=table, name_values=',\n'.join( [':{} as {}'.format(values[i], f) for i, f in enumerate(fields)]), src_tgt_condition='\nAND '.join( ['src.{f} = tgt.{f}'.format(f=f) for f in pk_fields]), set_fields=',\n'.join([ 'tgt.{f} = src.{f}'.format(f=f) for i, f in enumerate(fields) if f not in pk_fields_set ]), names=',\n'.join(['tgt.{f}'.format(f=f) for f in fields]), values=',\n'.join(['src.{f}'.format(f=f) for f in fields]), ) if field_types: input_sizes = { self._fix_f_name(f): cx_data_map[field_types[f][0]] for f in field_types } print(len(input_sizes)) print(input_sizes) cursor.setinputsizes(**input_sizes) cursor.prepare(sql) try: counter = 0 if is_gen_func(data): batch = [row] for row in data: batch.append(row) if len(batch) == self.batch_size: cursor.executemany(None, batch) counter += len(batch) batch = [] if len(batch): cursor.executemany(None, batch) counter += len(batch) else: # cursor.bindvars = None cursor.executemany(None, data) counter += len(data) if commit: connection.commit() else: return counter except Exception as e: log(Exception('Error for SQL: ' + sql)) raise e finally: cursor.close() connection.commit() connection.close() secs = (datetime.datetime.now() - s_t).total_seconds() mins = round(secs / 60, 1) rate = round(counter / secs, 1) if echo: log("Inserted {} records into table '{}' in {} mins [{} r/s].".format( counter, table, mins, rate)) return counter