Exemple #1
0
 def _get_run(self, dataset, source, id):
     self._get_dataset(dataset)
     require.dataset.update(c.dataset)
     c.source = Source.by_id(source)
     if c.source is None or c.source.dataset != c.dataset:
         abort(404, _("There is no source '%s'") % source)
     c.run = Run.by_id(id)
     if c.run is None or c.run.source != c.source:
         abort(404, _("There is no run '%s'") % id)
Exemple #2
0
def get_run(dataset, source, id):
    dataset = get_dataset(dataset)
    source = obj_or_404(Source.by_id(source))
    if source.dataset != dataset:
        raise BadRequest("There was no source")
    run = obj_or_404(Run.by_id(id))
    if run.source != source:
        raise BadRequest("There is no run %s" % str(id))
    return dataset, source, run
Exemple #3
0
 def _get_run(self, dataset, source, id):
     self._get_dataset(dataset)
     require.dataset.update(c.dataset)
     c.source = Source.by_id(source)
     if c.source is None or c.source.dataset != c.dataset:
         abort(404, _("There is no source '%s'") % source)
     c.run = Run.by_id(id)
     if c.run is None or c.run.source != c.source:
         abort(404, _("There is no run '%s'") % id)
Exemple #4
0
def get_run(dataset, source, id):
    dataset = get_dataset(dataset)
    require.dataset.update(dataset)
    source = obj_or_404(Source.by_id(source))
    if source.dataset != dataset:
        raise BadRequest("There was no source")
    run = obj_or_404(Run.by_id(id))
    if run.source != source:
        raise BadRequest("There is no run '" + str(id) + '")
    return dataset, source, run
Exemple #5
0
    def run(self, dry_run=False, max_lines=None, raise_errors=False, **kwargs):

        self.dry_run = dry_run
        self.raise_errors = raise_errors

        # Get unique key for this dataset
        self.key = self._get_unique_key()
        # If this is a dry run we need to check uniqueness
        # Initialize unique check dictionary
        if dry_run:
            self.unique_check = {}

        before_count = len(self.dataset)

        self.row_number = 0

        # If max_lines is set we're doing a sample, not an import
        operation = Run.OPERATION_SAMPLE if dry_run else Run.OPERATION_IMPORT
        self._run = Run(operation, Run.STATUS_RUNNING, self.dataset,
                        self.source)
        db.session.add(self._run)
        db.session.commit()
        log.info("Run reference: #%s", self._run.id)

        try:
            for row_number, line in enumerate(self.lines, start=1):
                if max_lines and row_number >= max_lines:
                    break

                self.row_number = row_number
                self.process_line(line)
        except Exception as ex:
            self.log_exception(ex)
            if self.raise_errors:
                self._run.status = Run.STATUS_FAILED
                self._run.time_end = datetime.utcnow()
                db.session.commit()
                raise

        if self.row_number == 0:
            self.log_exception(ValueError("Didn't read any lines of data"),
                               error='')

        num_loaded = len(self.dataset) - before_count
        if not dry_run and not self.errors and \
                num_loaded < (self.row_number - 1):
            self.log_exception(
                ValueError("The number of entries loaded is "
                           "smaller than the number of source rows read."),
                error="%s rows were read, but only %s entries created. "
                "Check the unique key criteria, entries seem to overlap." %
                (self.row_number, num_loaded))

        if self.errors:
            self._run.status = Run.STATUS_FAILED
        else:
            self._run.status = Run.STATUS_COMPLETE
            log.info("Finished import with no errors!")
        self._run.time_end = datetime.utcnow()
        self.dataset.updated_at = self._run.time_end
        db.session.commit()