def _determine_types(self): column_data = OrderedDict() self.columns = OrderedDict() if hasattr(self.data.generator, 'sqla_columns'): for col in self.data.generator.sqla_columns: self.columns[col.name] = {'is_nullable': col.nullable, 'is_unique': col.unique, 'satype': col.type, 'pytype': col.pytype} return self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v_raw = row[k] if not th.is_scalar(v_raw): v = str(v_raw) self.comments[k] = 'nested values! example:\n%s' % \ pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v_raw) if k not in self.columns: self.columns[k] = {'sample_datum': v, 'str_length': len(str(v_raw)), 'is_nullable': not (rowcount == 1 and v is not None and str(v).strip() ), 'is_unique': set([v, ])} else: col = self.columns[k] col['str_length'] = max(col['str_length'], len(str(v_raw))) old_sample_datum = col.get('sample_datum') col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None) or (not str(v).strip()): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])
def _determine_types(self, varying_length_text=False, uniques=False): column_data = OrderedDict() self.columns = OrderedDict() self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v = row[k] if not th.is_scalar(v): v = str(v) self.comments[ k] = 'nested values! example:\n%s' % pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v) if k not in self.columns: self.columns[k] = { 'sample_datum': v, 'is_unique': True, 'is_nullable': not (rowcount == 1 and v is not None), 'is_unique': set([ v, ]) } else: col = self.columns[k] col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])
def _determine_types(self): column_data = OrderedDict() self.columns = OrderedDict() self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v = row[k] if not th.is_scalar(v): v = str(v) self.comments[k] = 'nested values! example:\n%s' % \ pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v) if k not in self.columns: self.columns[k] = {'sample_datum': v, 'is_nullable': not (rowcount == 1 and v is not None), 'is_unique': set([v, ])} else: col = self.columns[k] col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])
def _determine_types(self): column_data = OrderedDict() self.columns = OrderedDict() if hasattr(self.data, 'generator') and hasattr(self.data.generator, 'sqla_columns'): for col in self.data.generator.sqla_columns: self.columns[col.name] = { 'is_nullable': col.nullable, 'is_unique': col.unique, 'satype': col.type, 'pytype': col.pytype } return self.comments = {} rowcount = 0 for row in self.data: rowcount += 1 keys = row.keys() for col_name in self.columns: if col_name not in keys: self.columns[col_name]['is_nullable'] = True if not isinstance(row, OrderedDict): keys = sorted(keys) for k in keys: v_raw = row[k] if not th.is_scalar(v_raw): v = str(v_raw) self.comments[k] = 'nested values! example:\n%s' % \ pprint.pformat(v) logging.warning('in %s: %s' % (k, self.comments[k])) v = th.coerce_to_specific(v_raw) if k not in self.columns: self.columns[k] = { 'sample_datum': v, 'str_length': len(str(v_raw)), 'is_nullable': not (rowcount == 1 and v is not None and str(v).strip()), 'is_unique': set([ v, ]) } else: col = self.columns[k] col['str_length'] = max(col['str_length'], len(str(v_raw))) old_sample_datum = col.get('sample_datum') col['sample_datum'] = th.best_representative( col['sample_datum'], v) if (v is None) or (not str(v).strip()): col['is_nullable'] = True if (col['is_unique'] != False): if v in col['is_unique']: col['is_unique'] = False else: col['is_unique'].add(v) for col_name in self.columns: col = self.columns[col_name] self._fill_metadata_from_sample(col) col['is_unique'] = bool(col['is_unique'])