コード例 #1
0
    def replace(self, table, data, pk_fields=None, commit=True, echo=True):
        "Upsert data into database"
        s_t = datetime.datetime.now()

        if not len(data):
            return False

        row = next(data) if is_gen_func(data) else data[0]

        mode = 'namedtuple' if isnamedtupleinstance(row) else 'dict'
        fields = row._fields if mode == 'namedtuple' else sorted(row.keys())
        values = [i + 1 for i in range(len(fields))
                  ] if mode == 'namedtuple' else fields

        connection = self.engine.raw_connection()
        connection.autocommit = False
        cursor = connection.cursor()

        sql = self._template('core.replace').format(
            table=table,
            names=', '.join(fields),
            values=', '.join(['?' for val in values]),
        )

        try:
            counter = 0
            if is_gen_func(data):
                batch = [row]

                for row in data:
                    batch.append(row)
                    if len(batch) == self.batch_size:
                        cursor.executemany(sql, batch)
                        counter += len(batch)
                        batch = []

                if len(batch):
                    cursor.executemany(sql, batch)
                    counter += len(batch)
            else:
                # cursor.bindvars = None
                cursor.executemany(sql, data)
                counter += len(data)

            if commit:
                connection.commit()
            else:
                return counter

        except Exception as e:
            log(Exception('Error for SQL: ' + sql))
            raise e

        finally:
            cursor.close()
            connection.commit()
            connection.close()

        secs = (datetime.datetime.now() - s_t).total_seconds()
        mins = round(secs / 60, 1)
        rate = round(counter / secs, 1)
        if echo:
            log("Inserted {} records into table '{}' in {} mins [{} r/s].".
                format(counter, table, mins, rate))
        return counter
コード例 #2
0
    def insert(self, table, data, echo=True):
        headers = next(data) if is_gen_func(data) else data[0]

        mode = 'namedtuple' if isnamedtupleinstance(headers) else 'dict'
        fields = headers._fields if mode == 'namedtuple' else sorted(
            headers.keys())
        values = [i + 1 for i in range(len(fields))
                  ] if mode == 'namedtuple' else fields

        connection = self.engine.raw_connection()
        connection.autocommit = False
        cursor = connection.cursor()
        sql = self._template('core.insert').format(
            table=table,
            names=', \n'.join([self._fix_f_name(f) for f in fields]),
            values=', \n'.join(['%s'] * len(values)),
        )

        i = 1

        def get_batch():
            for r, row in enumerate(data):
                yield row

        cols_str = ', '.join(fields)
        copy_sql = '''COPY {} ({}) FROM stdin WITH CSV DELIMITER '|' QUOTE '"' ESCAPE '"' '''.format(
            table, cols_str)

        try:
            deli = '|'
            s_t = datetime.datetime.now()
            if is_gen_func(data):
                cursor.execute(sql, headers)  # insert first row
                counter = 1
            else:
                counter = 0

            temp_file_path = '{}/batch_sql.csv'.format(self.tmp_folder)
            batch_f = open(temp_file_path, 'w')
            batch_w = csv.writer(batch_f,
                                 delimiter=deli,
                                 quoting=csv.QUOTE_MINIMAL)

            for r, row in enumerate(data):
                batch_w.writerow(row)
                counter += 1
                # if counter % self.batch_size == 0:
                #   cursor.copy_expert(copy_sql, open(temp_file_path, 'r'))

                #   batch_f = open(temp_file_path, 'w')
                #   batch_w = csv.writer(
                #     batch_f, delimiter=deli, quoting=csv.QUOTE_MINIMAL)

            batch_f.close()
            cursor.copy_expert(copy_sql, open(temp_file_path, 'r'))
            os.remove(temp_file_path)

        except Exception as e:
            log(Exception('Error for SQL: ' + sql))
            raise e

        finally:
            cursor.close()
            connection.commit()
            connection.close()

        secs = (datetime.datetime.now() - s_t).total_seconds()
        mins = round(secs / 60, 1)
        rate = round(counter / secs, 1)
        if echo:
            log("Inserted {} records into table '{}' in {} mins [{} r/s].".
                format(
                    counter,
                    table,
                    mins,
                    rate,
                ))
        return counter
コード例 #3
0
    def update(
        self,
        table,
        data,
        pk_fields,
        commit=True,
        echo=True,
        temp_table=None,
    ):
        "Update data in database"
        s_t = datetime.datetime.now()

        if not len(data):
            return False

        row = next(data) if is_gen_func(data) else data[0]

        mode = 'namedtuple' if isnamedtupleinstance(row) else 'dict'
        fields = row._fields if mode == 'namedtuple' else sorted(row.keys())
        if mode == 'namedtuple' and not temp_table:
            data = [r._asdict() for r in data]
            mode = 'dict'

        if temp_table:
            raise Exception('temp_table UPDATE is not supported in SQLite.')

        connection = self.engine.raw_connection()
        connection.autocommit = False
        cursor = connection.cursor()

        pk_fields_set = set(pk_fields)
        sql_tmpl = 'core.update'
        sql_tmpl = sql_tmpl + '_temp' if temp_table else sql_tmpl

        sql = self._template(sql_tmpl).format(
            table=table,
            set_fields=',\n'.join([
                '{f} = :{f}'.format(f=f) for i, f in enumerate(fields)
                if f not in pk_fields_set
            ]),
            pk_fields_equal=' and '.join(
                ['{f} = :{f}'.format(f=f) for f in pk_fields]),
            set_fields2=',\n'.join([
                '{f} = t2.{f}'.format(f=f) for i, f in enumerate(fields)
                if f not in pk_fields_set
            ]),
            pk_fields_equal2=' and '.join(
                ['t1.{f} = t2.{f}'.format(f=f) for f in pk_fields_set]),
            temp_table=temp_table,
        )

        if temp_table:
            # drop / create temp table
            self.drop_table(temp_table)
            self.execute(
                'create table {} as select * from {} where 1=0'.format(
                    temp_table,
                    table,
                ),
                echo=False,
            )
            self.insert(temp_table, data, echo=False)
            self.execute(sql, echo=False)
            self.execute('drop table if exists ' + temp_table, echo=False)
            counter = len(data)

        else:
            try:
                counter = 0
                if is_gen_func(data):
                    batch = [row]

                    for row in data:
                        batch.append(row)
                        if len(batch) == self.batch_size:
                            cursor.executemany(sql, batch)
                            counter += len(batch)
                            batch = []

                    if len(batch):
                        cursor.executemany(sql, batch)
                        counter += len(batch)
                else:
                    # cursor.bindvars = None
                    cursor.executemany(sql, data)
                    counter += len(data)

                if commit:
                    connection.commit()
                else:
                    return counter

            except Exception as e:
                log(Exception('Error for SQL: ' + sql))
                raise e

            finally:
                cursor.close()
                connection.commit()
                connection.close()

        secs = (datetime.datetime.now() - s_t).total_seconds()
        mins = round(secs / 60, 1)
        rate = round(counter / secs, 1)
        if echo:
            log("Inserted {} records into table '{}' in {} mins [{} r/s].".
                format(counter, table, mins, rate))
        return counter
コード例 #4
0
  def replace(self,
              table,
              data,
              pk_fields,
              field_types=None,
              commit=True,
              echo=True):
    "Insert/Update records of namedtuple or dicts"
    import cx_Oracle

    s_t = datetime.datetime.now()
    cx_data_map = dict(
      string=cx_Oracle.STRING,
      integer=cx_Oracle.NUMBER,
      decimal=cx_Oracle.NUMBER,
      date=cx_Oracle.DATETIME,
      datetime=cx_Oracle.DATETIME,
      text=cx_Oracle.CLOB,
    )

    if not len(data):
      return False

    row = next(data) if is_gen_func(data) else data[0]

    mode = 'namedtuple' if isnamedtupleinstance(row) else 'dict'
    fields = row._fields if mode == 'namedtuple' else sorted(row.keys())
    values = [i + 1
              for i in range(len(fields))] if mode == 'namedtuple' else fields

    
    connection = self.engine.raw_connection()
    connection.autocommit = False
    cursor = connection.cursor()

    pk_fields_set = set(pk_fields)
    sql = self._template('core.replace').format(
      table=table,
      name_values=',\n'.join(
        [':{} as {}'.format(values[i], f) for i, f in enumerate(fields)]),
      src_tgt_condition='\nAND '.join(
        ['src.{f} = tgt.{f}'.format(f=f) for f in pk_fields]),
      set_fields=',\n'.join([
        'tgt.{f} = src.{f}'.format(f=f) for i, f in enumerate(fields)
        if f not in pk_fields_set
      ]),
      names=',\n'.join(['tgt.{f}'.format(f=f) for f in fields]),
      values=',\n'.join(['src.{f}'.format(f=f) for f in fields]),
    )

    if field_types:
      input_sizes = {
        self._fix_f_name(f): cx_data_map[field_types[f][0]]
        for f in field_types
      }
      print(len(input_sizes))
      print(input_sizes)
      cursor.setinputsizes(**input_sizes)

    cursor.prepare(sql)

    try:
      counter = 0
      if is_gen_func(data):
        batch = [row]

        for row in data:
          batch.append(row)
          if len(batch) == self.batch_size:
            cursor.executemany(None, batch)
            counter += len(batch)
            batch = []

        if len(batch):
          cursor.executemany(None, batch)
          counter += len(batch)
      else:
        # cursor.bindvars = None
        cursor.executemany(None, data)
        counter += len(data)

      if commit:
        connection.commit()
      else:
        return counter

    except Exception as e:
      log(Exception('Error for SQL: ' + sql))
      raise e

    finally:
      cursor.close()
      connection.commit()
      connection.close()

    secs = (datetime.datetime.now() - s_t).total_seconds()
    mins = round(secs / 60, 1)
    rate = round(counter / secs, 1)
    if echo:
      log("Inserted {} records into table '{}' in {} mins [{} r/s].".format(
        counter, table, mins, rate))
    return counter