Ejemplo n.º 1
0
 def _determine_types(self):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     if hasattr(self.data.generator, 'sqla_columns'):
         for col in self.data.generator.sqla_columns:
             self.columns[col.name] = {'is_nullable': col.nullable,
                                       'is_unique': col.unique,
                                       'satype': col.type,
                                       'pytype': col.pytype}
         return
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v_raw = row[k]
             if not th.is_scalar(v_raw):
                 v = str(v_raw)
                 self.comments[k] = 'nested values! example:\n%s' % \
                                    pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v_raw)
             if k not in self.columns:
                 self.columns[k] = {'sample_datum': v,
                                    'str_length': len(str(v_raw)),
                                    'is_nullable': not (rowcount == 1 and
                                                        v is not None and
                                                        str(v).strip()
                                                        ),
                                    'is_unique': set([v, ])}
             else:
                 col = self.columns[k]
                 col['str_length'] = max(col['str_length'], len(str(v_raw)))
                 old_sample_datum = col.get('sample_datum')
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None) or (not str(v).strip()):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:   
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])
Ejemplo n.º 2
0
 def _determine_types(self, varying_length_text=False, uniques=False):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v = row[k]
             if not th.is_scalar(v):
                 v = str(v)
                 self.comments[
                     k] = 'nested values! example:\n%s' % pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v)
             if k not in self.columns:
                 self.columns[k] = {
                     'sample_datum': v,
                     'is_unique': True,
                     'is_nullable': not (rowcount == 1 and v is not None),
                     'is_unique': set([
                         v,
                     ])
                 }
             else:
                 col = self.columns[k]
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])
Ejemplo n.º 3
0
 def _determine_types(self):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v = row[k]
             if not th.is_scalar(v):
                 v = str(v)
                 self.comments[k] = 'nested values! example:\n%s' % \
                                    pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v)
             if k not in self.columns:
                 self.columns[k] = {'sample_datum': v,
                                    'is_nullable': not (rowcount == 1 and
                                                        v is not None),
                                    'is_unique': set([v, ])}
             else:
                 col = self.columns[k]
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])
Ejemplo n.º 4
0
 def _determine_types(self):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     if hasattr(self.data, 'generator') and hasattr(self.data.generator,
                                                    'sqla_columns'):
         for col in self.data.generator.sqla_columns:
             self.columns[col.name] = {
                 'is_nullable': col.nullable,
                 'is_unique': col.unique,
                 'satype': col.type,
                 'pytype': col.pytype
             }
         return
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v_raw = row[k]
             if not th.is_scalar(v_raw):
                 v = str(v_raw)
                 self.comments[k] = 'nested values! example:\n%s' % \
                                    pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v_raw)
             if k not in self.columns:
                 self.columns[k] = {
                     'sample_datum':
                     v,
                     'str_length':
                     len(str(v_raw)),
                     'is_nullable':
                     not (rowcount == 1 and v is not None
                          and str(v).strip()),
                     'is_unique':
                     set([
                         v,
                     ])
                 }
             else:
                 col = self.columns[k]
                 col['str_length'] = max(col['str_length'], len(str(v_raw)))
                 old_sample_datum = col.get('sample_datum')
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None) or (not str(v).strip()):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])