def analyze_csv(url, sample=1000): try: fileobj = urlopen(url) row_set = CSVRowSet('data', fileobj, window=sample) sample = list(row_set.sample) headers, sample = sample[0], sample[1:] # values = frequent_values(sample) types = type_guess(sample[500:], types=LIMITED_TYPES) mapping = {} for header, type_ in zip(headers, types): type_ = repr(type_).lower() name = slugify(header.value).lower() meta = { 'label': header.value, 'column': header.value, 'datatype': type_ } if type_ in ['decimal', 'integer', 'float']: meta['type'] = 'measure' meta['datatype'] = 'float' elif type_.startswith('date'): meta['type'] = 'date' meta['datatype'] = 'date' else: meta['type'] = 'attribute' mapping[name] = meta return {'columns': [h.value for h in headers], 'mapping': mapping} except Exception as e: log.exception(e) return {'error': unicode(e)}
def generate_mapping(fileobj, sample=2000): row_set = CSVRowSet('data', fileobj, window=sample) sample = list(row_set.sample) headers, sample = sample[0], sample[1:] values = frequent_values(sample) types = type_guess(sample) mapping = {} for header, type_, value in zip(headers, types, values): type_ = repr(type_).lower() name = slugify(header.value).lower() meta = { 'label': header.value, 'column': header.value, 'common_values': value, 'datatype': type_ } if type_ in ['decimal', 'integer', 'float']: meta['type'] = 'measure' meta['datatype'] = 'float' elif type_ in ['date']: meta['type'] = 'date' meta['datatype'] = 'date' else: meta['type'] = 'value' mapping[name] = meta return mapping
def analyze_csv(url, sample=1000): try: fileobj = urlopen(url) row_set = CSVRowSet('data', fileobj, window=sample) sample = list(row_set.sample) headers, sample = sample[0], sample[1:] #values = frequent_values(sample) types = type_guess(sample[500:], types=LIMITED_TYPES) mapping = {} for header, type_ in zip(headers, types): type_ = repr(type_).lower() name = slugify(header.value).lower() meta = { 'label': header.value, 'column': header.value, 'datatype': type_ } if type_ in ['decimal', 'integer', 'float']: meta['type'] = 'measure' meta['datatype'] = 'float' elif type_.startswith('date'): meta['type'] = 'date' meta['datatype'] = 'date' else: meta['type'] = 'attribute' mapping[name] = meta return {'columns': [h.value for h in headers], 'mapping': mapping} except Exception, e: return {'error': unicode(e)}
def make_name(dataset, label): from openspending.lib.util import slugify from itertools import count name = name_orig = slugify(label) view = View.by_name(dataset, name) for i in count(): if view is None: return name name = name_orig + str(i) view = View.by_name(dataset, name)
def entity_slug(entity): '''generate an ascii slug for an entity. ``entity`` A dict-like ``entity`` object Returns: `str` ''' slug_source = entity.get('label', '') if not slug_source: slug_source = entity.get('name', '') if not slug_source: slug_source = str(entity['_id']) return slugify(slug_source)
def entity_slug(entity): """generate an ascii slug for an entity. ``entity`` A dict-like ``entity`` object Returns: `str` """ slug_source = entity.get("label", "") if not slug_source: slug_source = entity.get("name", "") if not slug_source: slug_source = str(entity["_id"]) return slugify(slug_source)
def entity_slug(entity): '''generate an ascii slug for an entity. ``entity`` A :class:`openspending.model.Entity` object Returns: `str` ''' slug_source = entity.get('label', '') if not slug_source: slug_source = entity.get('name', '') if not slug_source: slug_source = str(entity['_id']) return slugify(slug_source)
def test_slugify(): h.assert_equal(util.slugify(u'foo'), 'foo') h.assert_equal(util.slugify(u'fóo'), 'foo') h.assert_equal(util.slugify(u'fóo&bañ'), 'foo-ban')
def test_slugify(self): assert util.slugify(u'foo') == 'foo' assert util.slugify(u'fóo') == 'foo' assert util.slugify(u'fóo&bañ') == 'foo-ban'