def convert(line): s = OrderedDict([ ('==', 'Equal'), ('!=', 'NotEqual'), ('!~=', 'NotAlmostEqual'), ('~=', 'AlmostEqual'), (lambda line: bool(line.count('>') > 1), partial(_convert, '>', 'Greater')), (lambda line: bool(line.count('<') > 1), partial(_convert, '<', 'Less')), ('>=', 'GreaterEqual'), ('<=', 'LessEqual'), ('>', 'Greater'), ('<', 'Less'), ('raises', lambda line: ['with self.assertRaises(' + line.split('raises')[1].strip() + '):', ' ' + line.split('raises')[0].strip()]), (' is not instanceof ', 'NotIsInstance'), (' is instanceof ', 'IsInstance'), ('for ', lambda line: line), (' not in ', 'NotIn'), (' in ', 'In'), (' is not None', 'IsNotNone'), (' is None', 'IsNone'), (' is not ', 'IsNot'), (' is ', 'Is'), ]) def to_lambda(i): def to_code(k, v, line): l, op, r = line.rpartition(k) params = ', '.join(map(strip, (l, r))).rstrip().rstrip(',') return 'self.assert' + v + '(' + params + ')' k, v = i new_v = v if callable(v) else partial(to_code, k, v) del s[k] if callable(k): s[k] = new_v else: s[lambda line: k in line] = new_v map(to_lambda, s.items()) matches = filter(lambda k: k(line), s.keys()) return s[matches[0]](line) if len(matches) else line
class Introspector(object): def __init__(self, **kwargs): self.cols = OrderedDict() self.col_order = OrderedDict() if 'reader_settings' in kwargs: self.reader_settings = kwargs['reader_settings'] else: self.reader_settings = {} def from_stream(self, stream, limit=None): for row_count, row in enumerate(OrderedDictReader(stream, **self.reader_settings)): if limit is None or row_count < limit: self.introspect_row(row) else: break return self def introspect_row(self, row): for (order, (key, val)) in enumerate(row.items()): self.set_col(key, val, order) def ordered_cols(self): '''Create a dictionary that brings together cols and col_order.''' return dict([(key, (self.cols[key], self.col_order[key])) for key in self.cols.keys()]) def set_col(self, key, value, order): if key in self.cols: cast = self.cols[key] else: cast = None # If we haven't done an initial cast, start at most restrictive and move down. if cast == None: if self.passes_int(value): cast = "INT" elif self.passes_float(value): cast = "FLOAT" elif self.passes_char(value): cast = "CHAR" else: cast = "TEXT" # If we've already cast this column, see if the cast can remain. elif cast == "INT": if self.passes_int(value): cast = "INT" elif self.passes_float(value): cast = "FLOAT" elif self.passes_char(value): cast = "CHAR" else: cast = "TEXT" elif cast == "FLOAT": if self.passes_float(value): cast = "FLOAT" elif self.passes_char(value): cast = "CHAR" else: cast = "TEXT" elif cast == "CHAR": if not self.passes_char(value): cast = "TEXT" else: cast = "TEXT" self.cols[key] = cast self.col_order[key] = order def passes_char(self, value): if isinstance(value, basestring): if len(value) <= 100: return True return False def passes_int(self, value, null=True): if re.match('\d{1,3},?\d{,3}?', value): value = value.replace(",", "") if null and value is None: return True try: return str(int(value)) == str(value) except ValueError: return False def passes_float(self, value, null=True): if re.match('\d{1,3},?\d{,3}?', value): value = value.replace(",", "") if null and value is None: return True try: float(value) return True except ValueError: return False