Exemple #1
0
def gen_data(fetch=None, **kwargs):
    """Generates data from records or file"""
    result = fetch(**kwargs)

    if result.get("f"):
        f = result.pop("f")
        ext = result.pop("ext", "csv")
        reader = io.get_reader(ext)
        records = reader(f, sanitize=True, **result)
    elif result.get("records"):
        records = result["records"]
    else:
        msg = "`fetch` must return a dict with either `records` or `f`."
        raise TypeError(msg)

    if kwargs.get("normalize"):
        normalized = kwargs["normalize"](records, **kwargs)
    else:
        normalized = records

    if kwargs.get("filterer"):
        filtered = it.ifilter(partial(kwargs["filterer"], **kwargs), normalized)
    else:
        filtered = normalized

    if kwargs.get("parse"):
        parsed = it.imap(partial(kwargs["parse"], **kwargs), filtered)
    else:
        parsed = filtered

    return parsed
Exemple #2
0
def gen_data(fetch=None, **kwargs):
    """Generates data from records or file"""
    result = fetch(**kwargs)

    if result.get('f'):
        f = result.pop('f')
        ext = result.pop('ext', 'csv')
        reader = io.get_reader(ext)
        records = reader(f, sanitize=True, **result)
    elif result.get('records'):
        records = result['records']
    else:
        msg = '`fetch` must return a dict with either `records` or `f`.'
        raise TypeError(msg)

    if kwargs.get('normalize'):
        normalized = kwargs['normalize'](records, **kwargs)
    else:
        normalized = records

    if kwargs.get('filterer'):
        filtered = it.ifilter(partial(kwargs['filterer'], **kwargs),
                              normalized)
    else:
        filtered = normalized

    if kwargs.get('parse'):
        parsed = it.imap(partial(kwargs['parse'], **kwargs), filtered)
    else:
        parsed = filtered

    return parsed
Exemple #3
0
    def update_datastore(self, resource_id, filepath, **kwargs):
        verbose = not kwargs.get('quiet')
        chunk_rows = kwargs.get('chunksize_rows')
        primary_key = kwargs.get('primary_key')
        content_type = kwargs.get('content_type')
        type_cast = kwargs.get('type_cast')
        method = 'upsert' if primary_key else 'insert'
        keys = ['aliases', 'primary_key', 'indexes']

        try:
            extension = p.splitext(filepath)[1].split('.')[1]
        except (IndexError, AttributeError):
            # no file extension given, e.g., a tempfile
            extension = cv.ctype2ext(content_type)

        try:
            reader = io.get_reader(extension)
        except TypeError:
            print('Error: plugin for extension `%s` not found!' % extension)
            return False
        else:
            records = reader(filepath, **kwargs)
            first = records.next()
            keys = first.keys()
            records = it.chain([first], records)

            if type_cast:
                records, results = pr.detect_types(records)
                types = results['types']
                casted_records = pr.type_cast(records, types)
            else:
                types = [{'id': key, 'type': 'text'} for key in keys]
                casted_records = records

            if verbose:
                print('Parsed types:')
                pprint(types)

            create_kwargs = {k: v for k, v in kwargs.items() if k in keys}

            if not primary_key:
                self.delete_table(resource_id)

            insert_kwargs = {'chunksize': chunk_rows, 'method': method}
            self.create_table(resource_id, types, **create_kwargs)
            args = [resource_id, casted_records]
            return self.insert_records(*args, **insert_kwargs)
Exemple #4
0
    def update_datastore(self, resource_id, filepath, **kwargs):
        verbose = not kwargs.get('quiet')
        chunk_rows = kwargs.get('chunksize_rows')
        primary_key = kwargs.get('primary_key')
        content_type = kwargs.get('content_type')
        type_cast = kwargs.get('type_cast')
        method = 'upsert' if primary_key else 'insert'
        keys = ['aliases', 'primary_key', 'indexes']

        try:
            extension = p.splitext(filepath)[1].split('.')[1]
        except (IndexError, AttributeError):
            # no file extension given, e.g., a tempfile
            extension = cv.ctype2ext(content_type)

        try:
            reader = io.get_reader(extension)
        except TypeError:
            print('Error: plugin for extension `%s` not found!' % extension)
            return False
        else:
            records = reader(filepath, **kwargs)
            first = records.next()
            keys = first.keys()
            records = it.chain([first], records)

            if type_cast:
                records, results = pr.detect_types(records)
                types = results['types']
                casted_records = pr.type_cast(records, types)
            else:
                types = [{'id': key, 'type': 'text'} for key in keys]
                casted_records = records

            if verbose:
                print('Parsed types:')
                pprint(types)

            create_kwargs = {k: v for k, v in kwargs.items() if k in keys}

            if not primary_key:
                self.delete_table(resource_id)

            insert_kwargs = {'chunksize': chunk_rows, 'method': method}
            self.create_table(resource_id, types, **create_kwargs)
            args = [resource_id, casted_records]
            return self.insert_records(*args, **insert_kwargs)
Exemple #5
0
    def test_get_reader(self):
        nt.assert_true(callable(io.get_reader("csv")))

        with nt.assert_raises(KeyError):
            io.get_reader("")