def gen_data(fetch=None, **kwargs): """Generates data from records or file""" result = fetch(**kwargs) if result.get("f"): f = result.pop("f") ext = result.pop("ext", "csv") reader = io.get_reader(ext) records = reader(f, sanitize=True, **result) elif result.get("records"): records = result["records"] else: msg = "`fetch` must return a dict with either `records` or `f`." raise TypeError(msg) if kwargs.get("normalize"): normalized = kwargs["normalize"](records, **kwargs) else: normalized = records if kwargs.get("filterer"): filtered = it.ifilter(partial(kwargs["filterer"], **kwargs), normalized) else: filtered = normalized if kwargs.get("parse"): parsed = it.imap(partial(kwargs["parse"], **kwargs), filtered) else: parsed = filtered return parsed
def gen_data(fetch=None, **kwargs): """Generates data from records or file""" result = fetch(**kwargs) if result.get('f'): f = result.pop('f') ext = result.pop('ext', 'csv') reader = io.get_reader(ext) records = reader(f, sanitize=True, **result) elif result.get('records'): records = result['records'] else: msg = '`fetch` must return a dict with either `records` or `f`.' raise TypeError(msg) if kwargs.get('normalize'): normalized = kwargs['normalize'](records, **kwargs) else: normalized = records if kwargs.get('filterer'): filtered = it.ifilter(partial(kwargs['filterer'], **kwargs), normalized) else: filtered = normalized if kwargs.get('parse'): parsed = it.imap(partial(kwargs['parse'], **kwargs), filtered) else: parsed = filtered return parsed
def update_datastore(self, resource_id, filepath, **kwargs): verbose = not kwargs.get('quiet') chunk_rows = kwargs.get('chunksize_rows') primary_key = kwargs.get('primary_key') content_type = kwargs.get('content_type') type_cast = kwargs.get('type_cast') method = 'upsert' if primary_key else 'insert' keys = ['aliases', 'primary_key', 'indexes'] try: extension = p.splitext(filepath)[1].split('.')[1] except (IndexError, AttributeError): # no file extension given, e.g., a tempfile extension = cv.ctype2ext(content_type) try: reader = io.get_reader(extension) except TypeError: print('Error: plugin for extension `%s` not found!' % extension) return False else: records = reader(filepath, **kwargs) first = records.next() keys = first.keys() records = it.chain([first], records) if type_cast: records, results = pr.detect_types(records) types = results['types'] casted_records = pr.type_cast(records, types) else: types = [{'id': key, 'type': 'text'} for key in keys] casted_records = records if verbose: print('Parsed types:') pprint(types) create_kwargs = {k: v for k, v in kwargs.items() if k in keys} if not primary_key: self.delete_table(resource_id) insert_kwargs = {'chunksize': chunk_rows, 'method': method} self.create_table(resource_id, types, **create_kwargs) args = [resource_id, casted_records] return self.insert_records(*args, **insert_kwargs)
def test_get_reader(self): nt.assert_true(callable(io.get_reader("csv"))) with nt.assert_raises(KeyError): io.get_reader("")