Example #1
0
    def _prep_datum(self, datum, dialect, col, needs_conversion):
        """Puts a value in proper format for a SQL string"""
        if datum is None or (needs_conversion and not str(datum).strip()):
            return 'NULL'
        pytype = self.columns[col]['pytype']

        if needs_conversion:
            if pytype == datetime.datetime:
                datum = dateutil.parser.parse(datum)
            elif pytype == bool:
                datum = th.coerce_to_specific(datum)
                if dialect.startswith('sqlite'):
                    datum = 1 if datum else 0
            else:
                datum = pytype(str(datum))

        if isinstance(datum, datetime.datetime) or isinstance(
                datum, datetime.date):
            if dialect in self._datetime_format:
                return datum.strftime(self._datetime_format[dialect])
            else:
                return "'%s'" % datum
        elif hasattr(datum, 'lower'):
            # simple SQL injection protection, sort of... ?
            return "'%s'" % datum.replace("'", "''")
        else:
            return datum
Example #2
0
 def _prep_datum(self, datum, dialect, col, needs_conversion):
     """Puts a value in proper format for a SQL string"""
     if datum is None or (needs_conversion and not str(datum).strip()):
         return 'NULL'
     pytype = self.columns[col]['pytype']
     
     if needs_conversion:
         if pytype == datetime.datetime:
             datum = dateutil.parser.parse(datum)
         elif pytype == bool:
             datum = th.coerce_to_specific(datum)
             if dialect.startswith('sqlite'):
                 datum = 1 if datum else 0
         else:
             datum = pytype(str(datum))
         
     if isinstance(datum, datetime.datetime) or isinstance(datum, datetime.date):
         if dialect in self._datetime_format:
             return datum.strftime(self._datetime_format[dialect])
         else:
             return "'%s'" % datum
     elif hasattr(datum, 'lower'):
         # simple SQL injection protection, sort of... ?
         return "'%s'" % datum.replace("'", "''")
     else:
         return datum
Example #3
0
 def _determine_types(self):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     if hasattr(self.data.generator, 'sqla_columns'):
         for col in self.data.generator.sqla_columns:
             self.columns[col.name] = {'is_nullable': col.nullable,
                                       'is_unique': col.unique,
                                       'satype': col.type,
                                       'pytype': col.pytype}
         return
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v_raw = row[k]
             if not th.is_scalar(v_raw):
                 v = str(v_raw)
                 self.comments[k] = 'nested values! example:\n%s' % \
                                    pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v_raw)
             if k not in self.columns:
                 self.columns[k] = {'sample_datum': v,
                                    'str_length': len(str(v_raw)),
                                    'is_nullable': not (rowcount == 1 and
                                                        v is not None and
                                                        str(v).strip()
                                                        ),
                                    'is_unique': set([v, ])}
             else:
                 col = self.columns[k]
                 col['str_length'] = max(col['str_length'], len(str(v_raw)))
                 old_sample_datum = col.get('sample_datum')
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None) or (not str(v).strip()):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:   
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])
Example #4
0
 def _prep_datum(self, datum, dialect, col):
     pytype = self.columns[col]['pytype']
     if pytype == datetime.datetime:
         datum = dateutil.parser.parse(datum)
     elif pytype == bool:
         datum = th.coerce_to_specific(datum)
     else:
         datum = pytype(str(datum))
     if isinstance(datum, datetime.datetime):
         if dialect in self._datetime_format:
             return datum.strftime(self._datetime_format[dialect])
         else:
             return "'%s'" % datum
     elif hasattr(datum, 'lower'):
         return "'%s'" % datum.replace("'", "''")
     else:
         return datum
Example #5
0
 def _prep_datum(self, datum, dialect, col):
     pytype = self.columns[col]['pytype']
     if pytype == datetime.datetime:
         datum = dateutil.parser.parse(datum)
     elif pytype == bool:
         datum = th.coerce_to_specific(datum)
     else:
         datum = pytype(str(datum))
     if isinstance(datum, datetime.datetime):
         if dialect in self._datetime_format:
             return datum.strftime(self._datetime_format[dialect])
         else:
             return "'%s'" % datum
     elif hasattr(datum, 'lower'):
         return "'%s'" % datum.replace("'", "''")
     else:
         return datum
Example #6
0
 def _determine_types(self, varying_length_text=False, uniques=False):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v = row[k]
             if not th.is_scalar(v):
                 v = str(v)
                 self.comments[
                     k] = 'nested values! example:\n%s' % pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v)
             if k not in self.columns:
                 self.columns[k] = {
                     'sample_datum': v,
                     'is_unique': True,
                     'is_nullable': not (rowcount == 1 and v is not None),
                     'is_unique': set([
                         v,
                     ])
                 }
             else:
                 col = self.columns[k]
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])
Example #7
0
 def _determine_types(self):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v = row[k]
             if not th.is_scalar(v):
                 v = str(v)
                 self.comments[k] = 'nested values! example:\n%s' % \
                                    pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v)
             if k not in self.columns:
                 self.columns[k] = {'sample_datum': v,
                                    'is_nullable': not (rowcount == 1 and
                                                        v is not None),
                                    'is_unique': set([v, ])}
             else:
                 col = self.columns[k]
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])
Example #8
0
 def _determine_types(self):
     column_data = OrderedDict()
     self.columns = OrderedDict()
     if hasattr(self.data, 'generator') and hasattr(self.data.generator,
                                                    'sqla_columns'):
         for col in self.data.generator.sqla_columns:
             self.columns[col.name] = {
                 'is_nullable': col.nullable,
                 'is_unique': col.unique,
                 'satype': col.type,
                 'pytype': col.pytype
             }
         return
     self.comments = {}
     rowcount = 0
     for row in self.data:
         rowcount += 1
         keys = row.keys()
         for col_name in self.columns:
             if col_name not in keys:
                 self.columns[col_name]['is_nullable'] = True
         if not isinstance(row, OrderedDict):
             keys = sorted(keys)
         for k in keys:
             v_raw = row[k]
             if not th.is_scalar(v_raw):
                 v = str(v_raw)
                 self.comments[k] = 'nested values! example:\n%s' % \
                                    pprint.pformat(v)
                 logging.warning('in %s: %s' % (k, self.comments[k]))
             v = th.coerce_to_specific(v_raw)
             if k not in self.columns:
                 self.columns[k] = {
                     'sample_datum':
                     v,
                     'str_length':
                     len(str(v_raw)),
                     'is_nullable':
                     not (rowcount == 1 and v is not None
                          and str(v).strip()),
                     'is_unique':
                     set([
                         v,
                     ])
                 }
             else:
                 col = self.columns[k]
                 col['str_length'] = max(col['str_length'], len(str(v_raw)))
                 old_sample_datum = col.get('sample_datum')
                 col['sample_datum'] = th.best_representative(
                     col['sample_datum'], v)
                 if (v is None) or (not str(v).strip()):
                     col['is_nullable'] = True
                 if (col['is_unique'] != False):
                     if v in col['is_unique']:
                         col['is_unique'] = False
                     else:
                         col['is_unique'].add(v)
     for col_name in self.columns:
         col = self.columns[col_name]
         self._fill_metadata_from_sample(col)
         col['is_unique'] = bool(col['is_unique'])