def _prep_datum(self, datum, dialect, col, needs_conversion): """Puts a value in proper format for a SQL string""" if datum is None or (needs_conversion and not str(datum).strip()): return 'NULL' pytype = self.columns[col]['pytype'] if needs_conversion: if pytype == datetime.datetime: datum = dateutil.parser.parse(datum) elif pytype == bool: datum = th.coerce_to_specific(datum) if dialect.startswith('sqlite'): datum = 1 if datum else 0 else: datum = pytype(str(datum)) if isinstance(datum, datetime.datetime) or isinstance( datum, datetime.date): if dialect in self._datetime_format: return datum.strftime(self._datetime_format[dialect]) else: return "'%s'" % datum elif hasattr(datum, 'lower'): # simple SQL injection protection, sort of... ? return "'%s'" % datum.replace("'", "''") else: return datum
def _prep_datum(self, datum, dialect, col, needs_conversion): """Puts a value in proper format for a SQL string""" if datum is None or (needs_conversion and not str(datum).strip()): return 'NULL' pytype = self.columns[col]['pytype'] if needs_conversion: if pytype == datetime.datetime: datum = dateutil.parser.parse(datum) elif pytype == bool: datum = th.coerce_to_specific(datum) if dialect.startswith('sqlite'): datum = 1 if datum else 0 else: datum = pytype(str(datum)) if isinstance(datum, datetime.datetime) or isinstance(datum, datetime.date): if dialect in self._datetime_format: return datum.strftime(self._datetime_format[dialect]) else: return "'%s'" % datum elif hasattr(datum, 'lower'): # simple SQL injection protection, sort of... ? return "'%s'" % datum.replace("'", "''") else: return datum
def _determine_types(self): column_data = OrderedDict() self.columns = OrderedDict() if hasattr(self.data.generator, 'sqla_columns'): for col in self.data.generator.sqla_columns: self.columns[col.name] = {'is_nullable': col.nullable, 'is_unique': col.unique, 'satype': col.type, 'pytype': col.pytype} return self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v_raw = row[k] if not th.is_scalar(v_raw): v = str(v_raw) self.comments[k] = 'nested values! example:\n%s' % \ pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v_raw) if k not in self.columns: self.columns[k] = {'sample_datum': v, 'str_length': len(str(v_raw)), 'is_nullable': not (rowcount == 1 and v is not None and str(v).strip() ), 'is_unique': set([v, ])} else: col = self.columns[k] col['str_length'] = max(col['str_length'], len(str(v_raw))) old_sample_datum = col.get('sample_datum') col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None) or (not str(v).strip()): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])
def _prep_datum(self, datum, dialect, col): pytype = self.columns[col]['pytype'] if pytype == datetime.datetime: datum = dateutil.parser.parse(datum) elif pytype == bool: datum = th.coerce_to_specific(datum) else: datum = pytype(str(datum)) if isinstance(datum, datetime.datetime): if dialect in self._datetime_format: return datum.strftime(self._datetime_format[dialect]) else: return "'%s'" % datum elif hasattr(datum, 'lower'): return "'%s'" % datum.replace("'", "''") else: return datum
def _determine_types(self, varying_length_text=False, uniques=False): column_data = OrderedDict() self.columns = OrderedDict() self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v = row[k] if not th.is_scalar(v): v = str(v) self.comments[ k] = 'nested values! example:\n%s' % pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v) if k not in self.columns: self.columns[k] = { 'sample_datum': v, 'is_unique': True, 'is_nullable': not (rowcount == 1 and v is not None), 'is_unique': set([ v, ]) } else: col = self.columns[k] col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])
def _determine_types(self): column_data = OrderedDict() self.columns = OrderedDict() self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v = row[k] if not th.is_scalar(v): v = str(v) self.comments[k] = 'nested values! example:\n%s' % \ pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v) if k not in self.columns: self.columns[k] = {'sample_datum': v, 'is_nullable': not (rowcount == 1 and v is not None), 'is_unique': set([v, ])} else: col = self.columns[k] col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])
def _determine_types(self): column_data = OrderedDict() self.columns = OrderedDict() if hasattr(self.data, 'generator') and hasattr(self.data.generator, 'sqla_columns'): for col in self.data.generator.sqla_columns: self.columns[col.name] = { 'is_nullable': col.nullable, 'is_unique': col.unique, 'satype': col.type, 'pytype': col.pytype } return self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v_raw = row[k] if not th.is_scalar(v_raw): v = str(v_raw) self.comments[k] = 'nested values! example:\n%s' % \ pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v_raw) if k not in self.columns: self.columns[k] = { 'sample_datum': v, 'str_length': len(str(v_raw)), 'is_nullable': not (rowcount == 1 and v is not None and str(v).strip()), 'is_unique': set([ v, ]) } else: col = self.columns[k] col['str_length'] = max(col['str_length'], len(str(v_raw))) old_sample_datum = col.get('sample_datum') col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None) or (not str(v).strip()): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])