Example #1
0
def analyze_csv(url, sample=1000):
    try:
        fileobj = urlopen(url)
        row_set = CSVRowSet('data', fileobj, window=sample)
        sample = list(row_set.sample)
        headers, sample = sample[0], sample[1:]
        # values = frequent_values(sample)
        types = type_guess(sample[500:], types=LIMITED_TYPES)
        mapping = {}
        for header, type_ in zip(headers, types):
            type_ = repr(type_).lower()
            name = slugify(header.value).lower()
            meta = {
                'label': header.value,
                'column': header.value,
                'datatype': type_
            }
            if type_ in ['decimal', 'integer', 'float']:
                meta['type'] = 'measure'
                meta['datatype'] = 'float'
            elif type_.startswith('date'):
                meta['type'] = 'date'
                meta['datatype'] = 'date'
            else:
                meta['type'] = 'attribute'
            mapping[name] = meta
        return {'columns': [h.value for h in headers], 'mapping': mapping}
    except Exception as e:
        log.exception(e)
        return {'error': unicode(e)}
Example #2
0
def generate_mapping(fileobj, sample=2000):
    row_set = CSVRowSet('data', fileobj, window=sample)
    sample = list(row_set.sample)
    headers, sample = sample[0], sample[1:]
    values = frequent_values(sample)
    types = type_guess(sample)
    mapping = {}
    for header, type_, value in zip(headers, types, values):
        type_ = repr(type_).lower()
        name = slugify(header.value).lower()
        meta = {
            'label': header.value,
            'column': header.value,
            'common_values': value,
            'datatype': type_
            }
        if type_ in ['decimal', 'integer', 'float']:
            meta['type'] = 'measure'
            meta['datatype'] = 'float'
        elif type_ in ['date']:
            meta['type'] = 'date'
            meta['datatype'] = 'date'
        else:
            meta['type'] = 'value'
        mapping[name] = meta
    return mapping
Example #3
0
def analyze_csv(url, sample=1000):
    try:
        fileobj = urlopen(url)
        row_set = CSVRowSet('data', fileobj, window=sample)
        sample = list(row_set.sample)
        headers, sample = sample[0], sample[1:]
        #values = frequent_values(sample)
        types = type_guess(sample[500:], types=LIMITED_TYPES)
        mapping = {}
        for header, type_ in zip(headers, types):
            type_ = repr(type_).lower()
            name = slugify(header.value).lower()
            meta = {
                'label': header.value,
                'column': header.value,
                'datatype': type_
                }
            if type_ in ['decimal', 'integer', 'float']:
                meta['type'] = 'measure'
                meta['datatype'] = 'float'
            elif type_.startswith('date'):
                meta['type'] = 'date'
                meta['datatype'] = 'date'
            else:
                meta['type'] = 'attribute'
            mapping[name] = meta
        return {'columns': [h.value for h in headers], 
                'mapping': mapping}
    except Exception, e:
        return {'error': unicode(e)}
Example #4
0
def make_name(dataset, label):
    from openspending.lib.util import slugify
    from itertools import count
    name = name_orig = slugify(label)
    view = View.by_name(dataset, name)
    for i in count():
        if view is None:
            return name
        name = name_orig + str(i)
        view = View.by_name(dataset, name)
Example #5
0
def make_name(dataset, label):
    from openspending.lib.util import slugify
    from itertools import count
    name = name_orig = slugify(label)
    view = View.by_name(dataset, name)
    for i in count():
        if view is None:
            return name
        name = name_orig + str(i)
        view = View.by_name(dataset, name)
Example #6
0
def entity_slug(entity):
    '''generate an ascii slug for an entity.

    ``entity``
        A dict-like ``entity`` object

    Returns: `str`
    '''
    slug_source = entity.get('label', '')
    if not slug_source:
        slug_source = entity.get('name', '')
    if not slug_source:
        slug_source = str(entity['_id'])
    return slugify(slug_source)
Example #7
0
def entity_slug(entity):
    """generate an ascii slug for an entity.

    ``entity``
        A dict-like ``entity`` object

    Returns: `str`
    """
    slug_source = entity.get("label", "")
    if not slug_source:
        slug_source = entity.get("name", "")
    if not slug_source:
        slug_source = str(entity["_id"])
    return slugify(slug_source)
Example #8
0
def entity_slug(entity):
    '''generate an ascii slug for an entity.

    ``entity``
        A :class:`openspending.model.Entity` object

    Returns: `str`
    '''
    slug_source = entity.get('label', '')
    if not slug_source:
        slug_source = entity.get('name', '')
    if not slug_source:
        slug_source = str(entity['_id'])
    return slugify(slug_source)
Example #9
0
def entity_slug(entity):
    '''generate an ascii slug for an entity.

    ``entity``
        A dict-like ``entity`` object

    Returns: `str`
    '''
    slug_source = entity.get('label', '')
    if not slug_source:
        slug_source = entity.get('name', '')
    if not slug_source:
        slug_source = str(entity['_id'])
    return slugify(slug_source)
Example #10
0
def test_slugify():
    h.assert_equal(util.slugify(u'foo'), 'foo')
    h.assert_equal(util.slugify(u'fóo'), 'foo')
    h.assert_equal(util.slugify(u'fóo&bañ'), 'foo-ban')
Example #11
0
def test_slugify():
    h.assert_equal(util.slugify(u'foo'), 'foo')
    h.assert_equal(util.slugify(u'fóo'), 'foo')
    h.assert_equal(util.slugify(u'fóo&bañ'), 'foo-ban')
Example #12
0
 def test_slugify(self):
     assert util.slugify(u'foo') == 'foo'
     assert util.slugify(u'fóo') == 'foo'
     assert util.slugify(u'fóo&bañ') == 'foo-ban'