def import_csv(dataset, url, args): """ Import the csv data into the dataset """ csv_data_url, source_url = url source = Source(dataset, shell_account(), csv_data_url) # Analyse the csv data and add it to the source # If we don't analyse it we'll be left with a weird message source.analysis = analyze_csv(csv_data_url) # Check to see if the dataset already has this source for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) # Check if imported from the file system (source and data url differ) if csv_data_url != source_url: # If we did, then we must update the source url based on the # sources in the dataset model (so we need to fetch the source again # or else we'll add a new one) source = Source.by_id(source.id) source.url = source_url db.session.commit()
def import_csv(dataset, url, args): """ Import the csv data into the dataset """ csv_data_url, source_url = url source = Source(dataset, shell_account(), csv_data_url) # Analyse the csv data and add it to the source # If we don't analyse it we'll be left with a weird message source.analysis = analyze_csv(csv_data_url) # Check to see if the dataset already has this source for source_ in dataset.sources: if source_.url == csv_data_url: source = source_ break db.session.add(source) db.session.commit() dataset.generate() importer = CSVImporter(source) importer.run(**vars(args)) # Check if imported from the file system (source and data url differ) if csv_data_url != source_url: # If we did, then we must update the source url based on the # sources in the dataset model (so we need to fetch the source again # or else we'll add a new one) source = Source.by_id(source.id) source.url = source_url db.session.commit()
def _get_run(self, dataset, source, id): self._get_dataset(dataset) require.dataset.update(c.dataset) c.source = Source.by_id(source) if c.source is None or c.source.dataset != c.dataset: abort(404, _("There is no source '%s'") % source) c.run = Run.by_id(id) if c.run is None or c.run.source != c.source: abort(404, _("There is no run '%s'") % id)
def analyze_source(source_id): from openspending.model import Source, meta as db from openspending.importer.analysis import analyze_csv source = Source.by_id(source_id) if not source: log.error("No such source: %s", source_id) log.info("Analyzing: %s", source.url) source.analysis = analyze_csv(source.url) if 'error' in source.analysis: log.error(source.analysis.get('error')) else: log.info("Columns: %r", source.analysis.get('columns')) db.session.commit()
def analyze_source(source_id): from openspending.model import Source, meta as db from openspending.importer.analysis import analyze_csv source = Source.by_id(source_id) if not source: log.error("No such source: %s", source_id) log.info("Analyzing: %s", source.url) source.analysis = analyze_csv(source.url) if 'error' in source.analysis: log.error(source.analysis.get('error')) else: log.info("Columns: %r", source.analysis.get('columns')) db.session.commit()
def load_source(source_id, sample=False): from openspending.model import Source from openspending.importer import CSVImporter source = Source.by_id(source_id) if not source: log.error("No such source: %s", source_id) if not source.loadable: log.error("Dataset has no mapping.") return source.dataset.generate() importer = CSVImporter(source) if sample: importer.run(max_lines=1000, max_errors=1000) else: importer.run() index_dataset.delay(source.dataset.name)
def load_source(source_id, sample=False): from openspending.model import Source from openspending.importer import CSVImporter source = Source.by_id(source_id) if not source: log.error("No such source: %s", source_id) if not source.loadable: log.error("Dataset has no mapping.") return source.dataset.generate() importer = CSVImporter(source) if sample: importer.run(dry_run=True, max_lines=1000, max_errors=1000) else: importer.run() index_dataset.delay(source.dataset.name)
def _get_source(self, dataset, id): self._get_dataset(dataset) c.source = Source.by_id(id) if c.source is None or c.source.dataset != c.dataset: abort(404, _("There is no source '%s'") % id)
def _get_source(self, dataset, id): self._get_dataset(dataset) c.source = Source.by_id(id) if c.source is None or c.source.dataset != c.dataset: abort(404, _("There is no source '%s'") % id)